fix errors
This commit is contained in:
@@ -42,9 +42,9 @@
|
|||||||
" \"batch_size\": 32,\n",
|
" \"batch_size\": 32,\n",
|
||||||
" \"train_split\": 0.8,\n",
|
" \"train_split\": 0.8,\n",
|
||||||
" \"num_workers\": 0,\n",
|
" \"num_workers\": 0,\n",
|
||||||
" \"epochs\": 100,\n",
|
" \"epochs\": 10,\n",
|
||||||
" \"learning_rate\": 2e-4,\n",
|
" \"learning_rate\": 2e-4,\n",
|
||||||
" \"dropout_rate\": 0.3,\n",
|
" \"dropout_rate\": 0.5,\n",
|
||||||
" \"backbone\": \"resnet18\",\n",
|
" \"backbone\": \"resnet18\",\n",
|
||||||
" \"output_dir\": r\"C:\\Users\\admin\\Projects\\autopilot\\models\\SiaN\\runs\",\n",
|
" \"output_dir\": r\"C:\\Users\\admin\\Projects\\autopilot\\models\\SiaN\\runs\",\n",
|
||||||
" \"save_every_n_epochs\": 15,\n",
|
" \"save_every_n_epochs\": 15,\n",
|
||||||
@@ -62,11 +62,11 @@
|
|||||||
" rx = np.radians(np.random.uniform(-angle_range, angle_range))\n",
|
" rx = np.radians(np.random.uniform(-angle_range, angle_range))\n",
|
||||||
" ry = np.radians(np.random.uniform(-angle_range, angle_range))\n",
|
" ry = np.radians(np.random.uniform(-angle_range, angle_range))\n",
|
||||||
" rz = np.radians(np.random.uniform(-angle_range, angle_range))\n",
|
" rz = np.radians(np.random.uniform(-angle_range, angle_range))\n",
|
||||||
" return np.array([rx, ry, rz, tx, ty, scale])\n",
|
" return np.array([tx, ty, rx, ry, rz, scale])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def homography_params_to_matrix(params, K):\n",
|
"def homography_params_to_matrix(params, K):\n",
|
||||||
" rx, ry, rz, tx, ty, scale = params\n",
|
" tx, ty, rx, ry, rz, scale = params\n",
|
||||||
" cy, sy = np.cos(rz), np.sin(rz)\n",
|
" cy, sy = np.cos(rz), np.sin(rz)\n",
|
||||||
" cp, sp = np.cos(ry), np.sin(ry)\n",
|
" cp, sp = np.cos(ry), np.sin(ry)\n",
|
||||||
" cr, sr = np.cos(rx), np.sin(rx)\n",
|
" cr, sr = np.cos(rx), np.sin(rx)\n",
|
||||||
@@ -87,7 +87,7 @@
|
|||||||
" r20, r21 = E[2, 0], E[2, 1]\n",
|
" r20, r21 = E[2, 0], E[2, 1]\n",
|
||||||
" ry = np.arctan2(r20, r21)\n",
|
" ry = np.arctan2(r20, r21)\n",
|
||||||
" rx = np.arctan2(-E[1, 2], E[1, 1])\n",
|
" rx = np.arctan2(-E[1, 2], E[1, 1])\n",
|
||||||
" return np.array([rx, ry, rz, tx, ty, scale], dtype=np.float32)\n",
|
" return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -189,7 +189,7 @@
|
|||||||
" else:\n",
|
" else:\n",
|
||||||
" google_img = self._google_images[idx]\n",
|
" google_img = self._google_images[idx]\n",
|
||||||
" yandex_img = self._yandex_images[idx]\n",
|
" yandex_img = self._yandex_images[idx]\n",
|
||||||
" target_params = np.zeros(6, dtype=np.float32)\n",
|
" target_params = np.array([0, 0, 0, 0, 0, 1], dtype=np.float32)\n",
|
||||||
" target_matrix = np.eye(3, dtype=np.float32)\n",
|
" target_matrix = np.eye(3, dtype=np.float32)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" google_img = Image.fromarray(google_img)\n",
|
" google_img = Image.fromarray(google_img)\n",
|
||||||
@@ -249,6 +249,12 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def angular_difference(pred_angles, target_angles):\n",
|
||||||
|
" diff = pred_angles - target_angles\n",
|
||||||
|
" diff = torch.atan2(torch.sin(diff), torch.cos(diff))\n",
|
||||||
|
" return torch.abs(diff)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"class HomographyCNN6(nn.Module):\n",
|
"class HomographyCNN6(nn.Module):\n",
|
||||||
@@ -263,31 +269,53 @@
|
|||||||
" nn.Linear(self.feature_dim * 4, 512),\n",
|
" nn.Linear(self.feature_dim * 4, 512),\n",
|
||||||
" nn.ReLU(inplace=True),\n",
|
" nn.ReLU(inplace=True),\n",
|
||||||
" nn.Dropout(dropout_rate),\n",
|
" nn.Dropout(dropout_rate),\n",
|
||||||
|
" nn.Linear(1024, 512),\n",
|
||||||
|
" nn.ReLU(inplace=True),\n",
|
||||||
|
" nn.Dropout(dropout_rate),\n",
|
||||||
" nn.Linear(512, 256),\n",
|
" nn.Linear(512, 256),\n",
|
||||||
" nn.ReLU(inplace=True),\n",
|
" nn.ReLU(inplace=True),\n",
|
||||||
" nn.Dropout(dropout_rate),\n",
|
" nn.Dropout(dropout_rate),\n",
|
||||||
" nn.Linear(256, 9),\n",
|
" nn.Linear(512, 9),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" def _normalize_sin_cos(self, _sin, _cos):\n",
|
||||||
|
" _len = torch.sqrt(_sin ** 2 + _cos ** 2)\n",
|
||||||
|
" return _sin / _len, _cos / _len\n",
|
||||||
|
"\n",
|
||||||
" def forward(self, img1, img2):\n",
|
" def forward(self, img1, img2):\n",
|
||||||
" f1 = self.backbone(img1)\n",
|
" f1 = self.backbone(img1)\n",
|
||||||
" f2 = self.backbone(img2)\n",
|
" f2 = self.backbone(img2)\n",
|
||||||
" combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)\n",
|
" combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)\n",
|
||||||
|
"\n",
|
||||||
|
" combined[:, (0, 1)] = torch.tanh(combined[:, (0, 1)]) * 10 # [-10; 10]\n",
|
||||||
|
" combined[:, (2, 3)] = self._normalize_sin_cos(torch.tanh(combined[:, 2]), torch.tanh(combined[:, 3]))\n",
|
||||||
|
" combined[:, (4, 5)] = self._normalize_sin_cos(torch.tanh(combined[:, 4]), torch.tanh(combined[:, 5]))\n",
|
||||||
|
" combined[:, (6, 7)] = self._normalize_sin_cos(torch.tanh(combined[:, 6]), torch.tanh(combined[:, 7]))\n",
|
||||||
|
" \n",
|
||||||
" return self.head(combined)\n",
|
" return self.head(combined)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def decode_output(self, output):\n",
|
" def decode_output(self, output):\n",
|
||||||
" tx, ty = output[:, 0], output[:, 1]\n",
|
" tx = output[:, 0]\n",
|
||||||
" sin1, cos1 = torch.tanh(output[:, 2]), torch.tanh(output[:, 3])\n",
|
" ty = output[:, 1]\n",
|
||||||
" sin2, cos2 = torch.tanh(output[:, 4]), torch.tanh(output[:, 5])\n",
|
|
||||||
" sin3, cos3 = torch.tanh(output[:, 6]), torch.tanh(output[:, 7])\n",
|
|
||||||
" scale = output[:, 8]\n",
|
" scale = output[:, 8]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" angle1 = torch.atan2(sin1, cos1)\n",
|
" angle1 = torch.atan2(output[:, 2], output[:, 3])\n",
|
||||||
" angle2 = torch.atan2(sin2, cos2)\n",
|
" angle2 = torch.atan2(output[:, 4], output[:, 5])\n",
|
||||||
" angle3 = torch.atan2(sin3, cos3)\n",
|
" angle3 = torch.atan2(output[:, 6], output[:, 7])\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return torch.stack([tx, ty, angle1, angle2, angle3, scale], dim=1)\n",
|
" return torch.stack([tx, ty, angle1, angle2, angle3, scale], dim=1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" def get_components(self, output):\n",
|
||||||
|
" decoded = self.decode_output(output)\n",
|
||||||
|
" return {\n",
|
||||||
|
" \"tx\": decoded[:, 0],\n",
|
||||||
|
" \"ty\": decoded[:, 1],\n",
|
||||||
|
" \"rx\": decoded[:, 2],\n",
|
||||||
|
" \"ry\": decoded[:, 3],\n",
|
||||||
|
" \"rz\": decoded[:, 4],\n",
|
||||||
|
" \"scale\": decoded[:, 5],\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"class HomographyLoss6(nn.Module):\n",
|
"class HomographyLoss6(nn.Module):\n",
|
||||||
" def __init__(self, angle_loss_weight=1.0, trans_loss_weight=1.0, scale_loss_weight=1.0):\n",
|
" def __init__(self, angle_loss_weight=1.0, trans_loss_weight=1.0, scale_loss_weight=1.0):\n",
|
||||||
@@ -301,63 +329,50 @@
|
|||||||
" tx_loss = self.criterion(pred[:, 0], target[:, 0])\n",
|
" tx_loss = self.criterion(pred[:, 0], target[:, 0])\n",
|
||||||
" ty_loss = self.criterion(pred[:, 1], target[:, 1])\n",
|
" ty_loss = self.criterion(pred[:, 1], target[:, 1])\n",
|
||||||
"\n",
|
"\n",
|
||||||
" sin1_pred, cos1_pred = pred[:, 2], pred[:, 3]\n",
|
" sin_rx_pred = pred[:, 2]\n",
|
||||||
" sin2_pred, cos2_pred = pred[:, 4], pred[:, 5]\n",
|
" cos_rx_pred = pred[:, 3]\n",
|
||||||
" sin3_pred, cos3_pred = pred[:, 6], pred[:, 7]\n",
|
" sin_ry_pred = pred[:, 4]\n",
|
||||||
|
" cos_ry_pred = pred[:, 5]\n",
|
||||||
|
" sin_rz_pred = pred[:, 6]\n",
|
||||||
|
" cos_rz_pred = pred[:, 7]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" sin1_target = torch.sin(target[:, 2])\n",
|
" sin_rx_target = torch.sin(target[:, 2])\n",
|
||||||
" cos1_target = torch.cos(target[:, 2])\n",
|
" cos_rx_target = torch.cos(target[:, 2])\n",
|
||||||
" sin2_target = torch.sin(target[:, 3])\n",
|
" sin_ry_target = torch.sin(target[:, 3])\n",
|
||||||
" cos2_target = torch.cos(target[:, 3])\n",
|
" cos_ry_target = torch.cos(target[:, 3])\n",
|
||||||
" sin3_target = torch.sin(target[:, 4])\n",
|
" sin_rz_target = torch.sin(target[:, 4])\n",
|
||||||
" cos3_target = torch.cos(target[:, 4])\n",
|
" cos_rz_target = torch.cos(target[:, 4])\n",
|
||||||
"\n",
|
"\n",
|
||||||
" sin1_pred_t = torch.tanh(sin1_pred)\n",
|
" dot_rx = sin_rx_pred * sin_rx_target + cos_rx_pred * cos_rx_target\n",
|
||||||
" cos1_pred_t = torch.tanh(cos1_pred)\n",
|
" dot_ry = sin_ry_pred * sin_ry_target + cos_ry_pred * cos_ry_target\n",
|
||||||
" sin2_pred_t = torch.tanh(sin2_pred)\n",
|
" dot_rz = sin_rz_pred * sin_rz_target + cos_rz_pred * cos_rz_target\n",
|
||||||
" cos2_pred_t = torch.tanh(cos2_pred)\n",
|
"\n",
|
||||||
" sin3_pred_t = torch.tanh(sin3_pred)\n",
|
" rx_loss = (1 - dot_rx).mean()\n",
|
||||||
" cos3_pred_t = torch.tanh(cos3_pred)\n",
|
" ry_loss = (1 - dot_ry).mean()\n",
|
||||||
" \n",
|
" rz_loss = (1 - dot_rz).mean()\n",
|
||||||
" angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean()\n",
|
|
||||||
" angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean()\n",
|
|
||||||
" angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean()\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" scale_loss = self.criterion(pred[:, 8], target[:, 5])\n",
|
" scale_loss = self.criterion(pred[:, 8], target[:, 5])\n",
|
||||||
"\n",
|
"\n",
|
||||||
" total_loss = (\n",
|
" total_loss = (\n",
|
||||||
" self.trans_loss_weight * (tx_loss + ty_loss) +\n",
|
" self.trans_loss_weight * (tx_loss + ty_loss) +\n",
|
||||||
" self.angle_loss_weight * (angle1_loss + angle2_loss + angle3_loss) +\n",
|
" self.angle_loss_weight * (rx_loss + ry_loss + rz_loss) +\n",
|
||||||
" self.scale_loss_weight * scale_loss\n",
|
" self.scale_loss_weight * scale_loss\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return total_loss\n",
|
" return total_loss\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def compute_mse_components(self, pred, target):\n",
|
" def compute_mse_components(self, pred, target):\n",
|
||||||
" tx_mse = self.criterion(pred[:, 0], target[:, 0]).item()\n",
|
" decoded = self.decode_output(pred)\n",
|
||||||
" ty_mse = self.criterion(pred[:, 1], target[:, 1]).item()\n",
|
" tx_mse = self.criterion(decoded[:, 0], target[:, 0]).item()\n",
|
||||||
|
" ty_mse = self.criterion(decoded[:, 1], target[:, 1]).item()\n",
|
||||||
"\n",
|
"\n",
|
||||||
" sin1_target = torch.sin(target[:, 2])\n",
|
" rx_mse = angular_difference(decoded[:, 2], target[:, 2]).item()\n",
|
||||||
" cos1_target = torch.cos(target[:, 2])\n",
|
" ry_mse = angular_difference(decoded[:, 3], target[:, 3]).item()\n",
|
||||||
" sin2_target = torch.sin(target[:, 3])\n",
|
" rz_mse = angular_difference(decoded[:, 4], target[:, 4]).item()\n",
|
||||||
" cos2_target = torch.cos(target[:, 3])\n",
|
|
||||||
" sin3_target = torch.sin(target[:, 4])\n",
|
|
||||||
" cos3_target = torch.cos(target[:, 4])\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" sin1_pred_t = torch.tanh(pred[:, 2])\n",
|
" scale_mse = self.criterion(decoded[:, 5], target[:, 5]).item()\n",
|
||||||
" cos1_pred_t = torch.tanh(pred[:, 3])\n",
|
|
||||||
" sin2_pred_t = torch.tanh(pred[:, 4])\n",
|
|
||||||
" cos2_pred_t = torch.tanh(pred[:, 5])\n",
|
|
||||||
" sin3_pred_t = torch.tanh(pred[:, 6])\n",
|
|
||||||
" cos3_pred_t = torch.tanh(pred[:, 7])\n",
|
|
||||||
" \n",
|
|
||||||
" angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean().item()\n",
|
|
||||||
" angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean().item()\n",
|
|
||||||
" angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean().item()\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" scale_mse = self.criterion(pred[:, 8], target[:, 5]).item()\n",
|
" avg_angle_loss = (rx_mse + ry_mse + rz_mse) / 3\n",
|
||||||
"\n",
|
|
||||||
" avg_angle_loss = (angle1_loss + angle2_loss + angle3_loss) / 3\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" return {\n",
|
" return {\n",
|
||||||
" 'trans': (tx_mse + ty_mse) / 2,\n",
|
" 'trans': (tx_mse + ty_mse) / 2,\n",
|
||||||
@@ -393,7 +408,7 @@
|
|||||||
" self.val_loader = val_loader\n",
|
" self.val_loader = val_loader\n",
|
||||||
" self.device = device\n",
|
" self.device = device\n",
|
||||||
" self.criterion = HomographyLoss6()\n",
|
" self.criterion = HomographyLoss6()\n",
|
||||||
" self.optimizer = optim.Adam(model.parameters(), lr=config[\"learning_rate\"])\n",
|
" self.optimizer = optim.Adam(model.parameters(), lr=config[\"learning_rate\"], weight_decay=1e-4)\n",
|
||||||
" self.writer = None\n",
|
" self.writer = None\n",
|
||||||
" self.best_val_loss = float(\"inf\")\n",
|
" self.best_val_loss = float(\"inf\")\n",
|
||||||
" self.train_losses = []\n",
|
" self.train_losses = []\n",
|
||||||
@@ -424,7 +439,8 @@
|
|||||||
" total_loss += loss.item() * google_img.size(0)\n",
|
" total_loss += loss.item() * google_img.size(0)\n",
|
||||||
" total_samples += google_img.size(0)\n",
|
" total_samples += google_img.size(0)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" mse_components = self.criterion.compute_mse_components(output, target)\n",
|
" decoded_output = self.model.decode_output(output)\n",
|
||||||
|
" mse_components = self.criterion.compute_mse_components(decoded_output, target)\n",
|
||||||
" mse_trans_sum += mse_components['trans'] * google_img.size(0)\n",
|
" mse_trans_sum += mse_components['trans'] * google_img.size(0)\n",
|
||||||
" mse_angle_sum += mse_components['angle'] * google_img.size(0)\n",
|
" mse_angle_sum += mse_components['angle'] * google_img.size(0)\n",
|
||||||
" mse_scale_sum += mse_components['scale'] * google_img.size(0)\n",
|
" mse_scale_sum += mse_components['scale'] * google_img.size(0)\n",
|
||||||
@@ -447,11 +463,12 @@
|
|||||||
" yandex_img = batch[\"yandex_img\"].to(self.device)\n",
|
" yandex_img = batch[\"yandex_img\"].to(self.device)\n",
|
||||||
" target = batch[\"homography_params\"].to(self.device)\n",
|
" target = batch[\"homography_params\"].to(self.device)\n",
|
||||||
" output = self.model(google_img, yandex_img)\n",
|
" output = self.model(google_img, yandex_img)\n",
|
||||||
|
" decoded_output = self.model.decode_output(output)\n",
|
||||||
" loss = self.criterion(output, target)\n",
|
" loss = self.criterion(output, target)\n",
|
||||||
" total_loss += loss.item() * google_img.size(0)\n",
|
" total_loss += loss.item() * google_img.size(0)\n",
|
||||||
" total_samples += google_img.size(0)\n",
|
" total_samples += google_img.size(0)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" mse_components = self.criterion.compute_mse_components(output, target)\n",
|
" mse_components = self.criterion.compute_mse_components(decoded_output, target)\n",
|
||||||
" mse_trans_sum += mse_components['trans'] * google_img.size(0)\n",
|
" mse_trans_sum += mse_components['trans'] * google_img.size(0)\n",
|
||||||
" mse_angle_sum += mse_components['angle'] * google_img.size(0)\n",
|
" mse_angle_sum += mse_components['angle'] * google_img.size(0)\n",
|
||||||
" mse_scale_sum += mse_components['scale'] * google_img.size(0)\n",
|
" mse_scale_sum += mse_components['scale'] * google_img.size(0)\n",
|
||||||
@@ -513,12 +530,6 @@
|
|||||||
"os.makedirs(IMG_DIR, exist_ok=True)\n",
|
"os.makedirs(IMG_DIR, exist_ok=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def angular_difference(pred_angles, target_angles):\n",
|
|
||||||
" diff = pred_angles - target_angles\n",
|
|
||||||
" diff = torch.atan2(torch.sin(diff), torch.cos(diff))\n",
|
|
||||||
" return torch.abs(diff)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def analyze_training(trainer):\n",
|
"def analyze_training(trainer):\n",
|
||||||
" print(\"=== Training Analysis ===\\n\")\n",
|
" print(\"=== Training Analysis ===\\n\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -538,37 +549,54 @@
|
|||||||
" n_samples = 50\n",
|
" n_samples = 50\n",
|
||||||
" names = [\"tx\", \"ty\", \"rx\", \"ry\", \"rz\", \"scale\"]\n",
|
" names = [\"tx\", \"ty\", \"rx\", \"ry\", \"rz\", \"scale\"]\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
" _, val_loader_for_analysis = create_data_loaders(\n",
|
||||||
|
" root_dir=config[\"data_dir\"],\n",
|
||||||
|
" batch_size=config[\"batch_size\"],\n",
|
||||||
|
" train_split=config[\"train_split\"],\n",
|
||||||
|
" num_workers=config[\"num_workers\"],\n",
|
||||||
|
" image_size=config[\"image_size\"],\n",
|
||||||
|
" augment_train=True,\n",
|
||||||
|
" cache_level=0,\n",
|
||||||
|
" )\n",
|
||||||
|
" \n",
|
||||||
" with torch.no_grad():\n",
|
" with torch.no_grad():\n",
|
||||||
" all_errors = [[] for _ in range(6)]\n",
|
" all_errors = [[] for _ in range(6)]\n",
|
||||||
" all_targets = [[] for _ in range(6)]\n",
|
" all_targets = [[] for _ in range(6)]\n",
|
||||||
" all_preds = [[] for _ in range(6)]\n",
|
" all_preds = [[] for _ in range(6)]\n",
|
||||||
" \n",
|
" \n",
|
||||||
" for i in range(n_samples):\n",
|
" sample_count = 0\n",
|
||||||
" try:\n",
|
" for batch in val_loader_for_analysis:\n",
|
||||||
" batch = next(iter(trainer.val_loader))\n",
|
" if sample_count >= n_samples:\n",
|
||||||
" except StopIteration:\n",
|
|
||||||
" break\n",
|
" break\n",
|
||||||
|
" \n",
|
||||||
" google_img = batch[\"google_img\"].to(trainer.device)\n",
|
" google_img = batch[\"google_img\"].to(trainer.device)\n",
|
||||||
" yandex_img = batch[\"yandex_img\"].to(trainer.device)\n",
|
" yandex_img = batch[\"yandex_img\"].to(trainer.device)\n",
|
||||||
" target_params = batch[\"homography_params\"].to(trainer.device)\n",
|
" target_params = batch[\"homography_params\"].to(trainer.device)\n",
|
||||||
" pred_params = trainer.model(google_img, yandex_img)\n",
|
" pred_params = trainer.model(google_img, yandex_img)\n",
|
||||||
" decoded_pred = trainer.model.decode_output(pred_params)\n",
|
" decoded_pred = trainer.model.decode_output(pred_params)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).item()\n",
|
" batch_size = google_img.size(0)\n",
|
||||||
" ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).item()\n",
|
" for i in range(batch_size):\n",
|
||||||
" rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).item()\n",
|
" if sample_count >= n_samples:\n",
|
||||||
" ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).item()\n",
|
" break\n",
|
||||||
" rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).item()\n",
|
" \n",
|
||||||
" scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).item()\n",
|
" tx_error = torch.abs(decoded_pred[i, 0] - target_params[i, 0]).item()\n",
|
||||||
" \n",
|
" ty_error = torch.abs(decoded_pred[i, 1] - target_params[i, 1]).item()\n",
|
||||||
" errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]\n",
|
" rx_error = angular_difference(decoded_pred[i, 2], target_params[i, 2]).item()\n",
|
||||||
" targets = target_params[0].cpu().numpy()\n",
|
" ry_error = angular_difference(decoded_pred[i, 3], target_params[i, 3]).item()\n",
|
||||||
" preds = decoded_pred[0].cpu().numpy()\n",
|
" rz_error = angular_difference(decoded_pred[i, 4], target_params[i, 4]).item()\n",
|
||||||
" \n",
|
" scale_error = torch.abs(decoded_pred[i, 5] - target_params[i, 5]).item()\n",
|
||||||
" for j in range(6):\n",
|
" \n",
|
||||||
" all_errors[j].append(errors[j])\n",
|
" errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]\n",
|
||||||
" all_targets[j].append(targets[j])\n",
|
" target_reordered = target_params[i].cpu().numpy()\n",
|
||||||
" all_preds[j].append(preds[j])\n",
|
" pred_reordered = decoded_pred[i].cpu().numpy()\n",
|
||||||
|
" \n",
|
||||||
|
" for j in range(6):\n",
|
||||||
|
" all_errors[j].append(errors[j])\n",
|
||||||
|
" all_targets[j].append(target_reordered[j])\n",
|
||||||
|
" all_preds[j].append(pred_reordered[j])\n",
|
||||||
|
" \n",
|
||||||
|
" sample_count += 1\n",
|
||||||
" \n",
|
" \n",
|
||||||
" mean_errors = [np.mean(all_errors[i]) for i in range(6)]\n",
|
" mean_errors = [np.mean(all_errors[i]) for i in range(6)]\n",
|
||||||
" std_errors = [np.std(all_errors[i]) for i in range(6)]\n",
|
" std_errors = [np.std(all_errors[i]) for i in range(6)]\n",
|
||||||
@@ -618,7 +646,7 @@
|
|||||||
" for j in range(6):\n",
|
" for j in range(6):\n",
|
||||||
" row = j // 3\n",
|
" row = j // 3\n",
|
||||||
" col = j % 3\n",
|
" col = j % 3\n",
|
||||||
" axes[row, col].bar(range(n_samples), all_errors[j], color=\"steelblue\", alpha=0.7)\n",
|
" axes[row, col].bar(range(len(all_errors[j])), all_errors[j], color=\"steelblue\", alpha=0.7)\n",
|
||||||
" axes[row, col].set_xlabel(\"Sample\")\n",
|
" axes[row, col].set_xlabel(\"Sample\")\n",
|
||||||
" axes[row, col].set_ylabel(\"Absolute Error\")\n",
|
" axes[row, col].set_ylabel(\"Absolute Error\")\n",
|
||||||
" axes[row, col].set_title(f\"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}\")\n",
|
" axes[row, col].set_title(f\"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}\")\n",
|
||||||
@@ -657,63 +685,70 @@
|
|||||||
" n_vis_samples = 20\n",
|
" n_vis_samples = 20\n",
|
||||||
" \n",
|
" \n",
|
||||||
" with torch.no_grad():\n",
|
" with torch.no_grad():\n",
|
||||||
" for sample_idx in range(n_vis_samples):\n",
|
" vis_count = 0\n",
|
||||||
" try:\n",
|
" for batch in val_loader_for_analysis:\n",
|
||||||
" batch = next(iter(trainer.val_loader))\n",
|
" if vis_count >= n_vis_samples:\n",
|
||||||
" except StopIteration:\n",
|
|
||||||
" break\n",
|
" break\n",
|
||||||
" google_img = batch[\"google_img\"].to(trainer.device)\n",
|
" batch_size = batch[\"google_img\"].size(0)\n",
|
||||||
" yandex_img = batch[\"yandex_img\"].to(trainer.device)\n",
|
|
||||||
" target_params = batch[\"homography_params\"].to(trainer.device)\n",
|
|
||||||
" pred_params = trainer.model(google_img, yandex_img)\n",
|
|
||||||
" decoded_pred = trainer.model.decode_output(pred_params)\n",
|
|
||||||
" \n",
|
" \n",
|
||||||
" tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).cpu().numpy()\n",
|
" for i in range(batch_size):\n",
|
||||||
" ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).cpu().numpy()\n",
|
" if vis_count >= n_vis_samples:\n",
|
||||||
" rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).cpu().numpy()\n",
|
" break\n",
|
||||||
" ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).cpu().numpy()\n",
|
" \n",
|
||||||
" rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).cpu().numpy()\n",
|
" google_img = batch[\"google_img\"][i:i+1].to(trainer.device)\n",
|
||||||
" scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).cpu().numpy()\n",
|
" yandex_img = batch[\"yandex_img\"][i:i+1].to(trainer.device)\n",
|
||||||
" \n",
|
" target_params = batch[\"homography_params\"][i:i+1].to(trainer.device)\n",
|
||||||
" errors = np.array([tx_error[0], ty_error[0], rx_error[0], ry_error[0], rz_error[0], scale_error[0]])\n",
|
" pred_params = trainer.model(google_img, yandex_img)\n",
|
||||||
" targets = target_params[0].cpu().numpy()\n",
|
" decoded_pred = trainer.model.decode_output(pred_params)\n",
|
||||||
" preds = decoded_pred[0].cpu().numpy()\n",
|
" \n",
|
||||||
" \n",
|
" tx_error = torch.abs(decoded_pred[0, 0] - target_params[0, 0]).item()\n",
|
||||||
" fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n",
|
" ty_error = torch.abs(decoded_pred[0, 1] - target_params[0, 1]).item()\n",
|
||||||
" \n",
|
" rx_error = angular_difference(decoded_pred[0, 2], target_params[0, 2]).item()\n",
|
||||||
" axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))\n",
|
" ry_error = angular_difference(decoded_pred[0, 3], target_params[0, 3]).item()\n",
|
||||||
" axes[0, 0].set_title(f\"Google Image\")\n",
|
" rz_error = angular_difference(decoded_pred[0, 4], target_params[0, 4]).item()\n",
|
||||||
" axes[0, 0].axis(\"off\")\n",
|
" scale_error = torch.abs(decoded_pred[0, 5] - target_params[0, 5]).item()\n",
|
||||||
" \n",
|
" \n",
|
||||||
" axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))\n",
|
" errors = np.array([tx_error, ty_error, rx_error, ry_error, rz_error, scale_error])\n",
|
||||||
" axes[0, 1].set_title(f\"Yandex Image\")\n",
|
" targets = target_params[0].cpu().numpy()\n",
|
||||||
" axes[0, 1].axis(\"off\")\n",
|
" preds = decoded_pred[0].cpu().numpy()\n",
|
||||||
" \n",
|
" \n",
|
||||||
" x_pos = np.arange(6)\n",
|
" fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n",
|
||||||
" width = 0.35\n",
|
" \n",
|
||||||
" axes[1, 0].bar(x_pos - width/2, targets, width, label=\"Target\", color=\"steelblue\", alpha=0.8)\n",
|
" axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))\n",
|
||||||
" axes[1, 0].bar(x_pos + width/2, preds, width, label=\"Predicted\", color=\"coral\", alpha=0.8)\n",
|
" axes[0, 0].set_title(f\"Google Image\")\n",
|
||||||
" axes[1, 0].set_xticks(x_pos)\n",
|
" axes[0, 0].axis(\"off\")\n",
|
||||||
" axes[1, 0].set_xticklabels(names)\n",
|
" \n",
|
||||||
" axes[1, 0].set_ylabel(\"Parameter Value\")\n",
|
" axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))\n",
|
||||||
" axes[1, 0].set_title(\"Target vs Predicted\")\n",
|
" axes[0, 1].set_title(f\"Yandex Image\")\n",
|
||||||
" axes[1, 0].legend()\n",
|
" axes[0, 1].axis(\"off\")\n",
|
||||||
" axes[1, 0].grid(True, alpha=0.3, axis=\"y\")\n",
|
" \n",
|
||||||
" \n",
|
" x_pos = np.arange(6)\n",
|
||||||
" axes[1, 1].bar(x_pos, errors, color=[\"c\", \"m\", \"y\", \"g\", \"b\", \"r\"], alpha=0.8)\n",
|
" width = 0.35\n",
|
||||||
" axes[1, 1].set_xticks(x_pos)\n",
|
" axes[1, 0].bar(x_pos - width/2, targets, width, label=\"Target\", color=\"steelblue\", alpha=0.8)\n",
|
||||||
" axes[1, 1].set_xticklabels(names)\n",
|
" axes[1, 0].bar(x_pos + width/2, preds, width, label=\"Predicted\", color=\"coral\", alpha=0.8)\n",
|
||||||
" axes[1, 1].set_ylabel(\"Absolute Error\")\n",
|
" axes[1, 0].set_xticks(x_pos)\n",
|
||||||
" axes[1, 1].set_title(f\"Prediction Error (Mean: {np.mean(errors):.4f})\")\n",
|
" axes[1, 0].set_xticklabels(names)\n",
|
||||||
" axes[1, 1].grid(True, alpha=0.3, axis=\"y\")\n",
|
" axes[1, 0].set_ylabel(\"Parameter Value\")\n",
|
||||||
" for i, e in enumerate(errors):\n",
|
" axes[1, 0].set_title(\"Target vs Predicted\")\n",
|
||||||
" axes[1, 1].text(i, e + 0.01, f\"{e:.3f}\", ha=\"center\", va=\"bottom\", fontsize=8)\n",
|
" axes[1, 0].legend()\n",
|
||||||
" \n",
|
" axes[1, 0].grid(True, alpha=0.3, axis=\"y\")\n",
|
||||||
" plt.suptitle(f\"Sample {sample_idx + 1}\", fontsize=14)\n",
|
" \n",
|
||||||
" plt.tight_layout()\n",
|
" axes[1, 1].bar(x_pos, errors, color=[\"c\", \"m\", \"y\", \"g\", \"b\", \"r\"], alpha=0.8)\n",
|
||||||
" plt.savefig(os.path.join(IMG_DIR, f\"prediction_sample_{sample_idx + 1:02d}.png\"), dpi=100)\n",
|
" axes[1, 1].set_xticks(x_pos)\n",
|
||||||
" plt.show()\n",
|
" axes[1, 1].set_xticklabels(names)\n",
|
||||||
" print(f\"Saved prediction_sample_{sample_idx + 1:02d}.png\")\n",
|
" axes[1, 1].set_ylabel(\"Absolute Error\")\n",
|
||||||
|
" axes[1, 1].set_title(f\"Prediction Error (Mean: {np.mean(errors):.4f})\")\n",
|
||||||
|
" axes[1, 1].grid(True, alpha=0.3, axis=\"y\")\n",
|
||||||
|
" for i_e, e in enumerate(errors):\n",
|
||||||
|
" axes[1, 1].text(i_e, e + 0.01, f\"{e:.3f}\", ha=\"center\", va=\"bottom\", fontsize=8)\n",
|
||||||
|
" \n",
|
||||||
|
" plt.suptitle(f\"Sample {vis_count + 1}\", fontsize=14)\n",
|
||||||
|
" plt.tight_layout()\n",
|
||||||
|
" plt.savefig(os.path.join(IMG_DIR, f\"prediction_sample_{vis_count + 1:02d}.png\"), dpi=100)\n",
|
||||||
|
" plt.show()\n",
|
||||||
|
" print(f\"Saved prediction_sample_{vis_count + 1:02d}.png\")\n",
|
||||||
|
" \n",
|
||||||
|
" vis_count += 1\n",
|
||||||
" \n",
|
" \n",
|
||||||
" print(f\"\\nPrediction errors over {n_samples} samples:\")\n",
|
" print(f\"\\nPrediction errors over {n_samples} samples:\")\n",
|
||||||
" print(f\"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}\")\n",
|
" print(f\"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}\")\n",
|
||||||
|
|||||||
@@ -3,18 +3,14 @@ import torch
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from .dataloader import create_data_loaders
|
||||||
|
from .model import angular_difference
|
||||||
from .utils import config
|
from .utils import config
|
||||||
|
|
||||||
IMG_DIR = os.path.join(config["output_dir"], "images")
|
IMG_DIR = os.path.join(config["output_dir"], "images")
|
||||||
os.makedirs(IMG_DIR, exist_ok=True)
|
os.makedirs(IMG_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def angular_difference(pred_angles, target_angles):
|
|
||||||
diff = pred_angles - target_angles
|
|
||||||
diff = torch.atan2(torch.sin(diff), torch.cos(diff))
|
|
||||||
return torch.abs(diff)
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_training(trainer):
|
def analyze_training(trainer):
|
||||||
print("=== Training Analysis ===\n")
|
print("=== Training Analysis ===\n")
|
||||||
|
|
||||||
@@ -34,37 +30,54 @@ def analyze_training(trainer):
|
|||||||
n_samples = 50
|
n_samples = 50
|
||||||
names = ["tx", "ty", "rx", "ry", "rz", "scale"]
|
names = ["tx", "ty", "rx", "ry", "rz", "scale"]
|
||||||
|
|
||||||
|
_, val_loader_for_analysis = create_data_loaders(
|
||||||
|
root_dir=config["data_dir"],
|
||||||
|
batch_size=config["batch_size"],
|
||||||
|
train_split=config["train_split"],
|
||||||
|
num_workers=config["num_workers"],
|
||||||
|
image_size=config["image_size"],
|
||||||
|
augment_train=True,
|
||||||
|
cache_level=0,
|
||||||
|
)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
all_errors = [[] for _ in range(6)]
|
all_errors = [[] for _ in range(6)]
|
||||||
all_targets = [[] for _ in range(6)]
|
all_targets = [[] for _ in range(6)]
|
||||||
all_preds = [[] for _ in range(6)]
|
all_preds = [[] for _ in range(6)]
|
||||||
|
|
||||||
for i in range(n_samples):
|
sample_count = 0
|
||||||
try:
|
for batch in val_loader_for_analysis:
|
||||||
batch = next(iter(trainer.val_loader))
|
if sample_count >= n_samples:
|
||||||
except StopIteration:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
google_img = batch["google_img"].to(trainer.device)
|
google_img = batch["google_img"].to(trainer.device)
|
||||||
yandex_img = batch["yandex_img"].to(trainer.device)
|
yandex_img = batch["yandex_img"].to(trainer.device)
|
||||||
target_params = batch["homography_params"].to(trainer.device)
|
target_params = batch["homography_params"].to(trainer.device)
|
||||||
pred_params = trainer.model(google_img, yandex_img)
|
pred_params = trainer.model(google_img, yandex_img)
|
||||||
decoded_pred = trainer.model.decode_output(pred_params)
|
decoded_pred = trainer.model.decode_output(pred_params)
|
||||||
|
|
||||||
tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).item()
|
batch_size = google_img.size(0)
|
||||||
ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).item()
|
for i in range(batch_size):
|
||||||
rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).item()
|
if sample_count >= n_samples:
|
||||||
ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).item()
|
break
|
||||||
rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).item()
|
|
||||||
scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).item()
|
tx_error = torch.abs(decoded_pred[i, 0] - target_params[i, 0]).item()
|
||||||
|
ty_error = torch.abs(decoded_pred[i, 1] - target_params[i, 1]).item()
|
||||||
errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]
|
rx_error = angular_difference(decoded_pred[i, 2], target_params[i, 2]).item()
|
||||||
targets = target_params[0].cpu().numpy()
|
ry_error = angular_difference(decoded_pred[i, 3], target_params[i, 3]).item()
|
||||||
preds = decoded_pred[0].cpu().numpy()
|
rz_error = angular_difference(decoded_pred[i, 4], target_params[i, 4]).item()
|
||||||
|
scale_error = torch.abs(decoded_pred[i, 5] - target_params[i, 5]).item()
|
||||||
for j in range(6):
|
|
||||||
all_errors[j].append(errors[j])
|
errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]
|
||||||
all_targets[j].append(targets[j])
|
target_reordered = target_params[i].cpu().numpy()
|
||||||
all_preds[j].append(preds[j])
|
pred_reordered = decoded_pred[i].cpu().numpy()
|
||||||
|
|
||||||
|
for j in range(6):
|
||||||
|
all_errors[j].append(errors[j])
|
||||||
|
all_targets[j].append(target_reordered[j])
|
||||||
|
all_preds[j].append(pred_reordered[j])
|
||||||
|
|
||||||
|
sample_count += 1
|
||||||
|
|
||||||
mean_errors = [np.mean(all_errors[i]) for i in range(6)]
|
mean_errors = [np.mean(all_errors[i]) for i in range(6)]
|
||||||
std_errors = [np.std(all_errors[i]) for i in range(6)]
|
std_errors = [np.std(all_errors[i]) for i in range(6)]
|
||||||
@@ -114,7 +127,7 @@ def analyze_training(trainer):
|
|||||||
for j in range(6):
|
for j in range(6):
|
||||||
row = j // 3
|
row = j // 3
|
||||||
col = j % 3
|
col = j % 3
|
||||||
axes[row, col].bar(range(n_samples), all_errors[j], color="steelblue", alpha=0.7)
|
axes[row, col].bar(range(len(all_errors[j])), all_errors[j], color="steelblue", alpha=0.7)
|
||||||
axes[row, col].set_xlabel("Sample")
|
axes[row, col].set_xlabel("Sample")
|
||||||
axes[row, col].set_ylabel("Absolute Error")
|
axes[row, col].set_ylabel("Absolute Error")
|
||||||
axes[row, col].set_title(f"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}")
|
axes[row, col].set_title(f"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}")
|
||||||
@@ -153,63 +166,70 @@ def analyze_training(trainer):
|
|||||||
n_vis_samples = 20
|
n_vis_samples = 20
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for sample_idx in range(n_vis_samples):
|
vis_count = 0
|
||||||
try:
|
for batch in val_loader_for_analysis:
|
||||||
batch = next(iter(trainer.val_loader))
|
if vis_count >= n_vis_samples:
|
||||||
except StopIteration:
|
|
||||||
break
|
break
|
||||||
google_img = batch["google_img"].to(trainer.device)
|
batch_size = batch["google_img"].size(0)
|
||||||
yandex_img = batch["yandex_img"].to(trainer.device)
|
|
||||||
target_params = batch["homography_params"].to(trainer.device)
|
|
||||||
pred_params = trainer.model(google_img, yandex_img)
|
|
||||||
decoded_pred = trainer.model.decode_output(pred_params)
|
|
||||||
|
|
||||||
tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).cpu().numpy()
|
for i in range(batch_size):
|
||||||
ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).cpu().numpy()
|
if vis_count >= n_vis_samples:
|
||||||
rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).cpu().numpy()
|
break
|
||||||
ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).cpu().numpy()
|
|
||||||
rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).cpu().numpy()
|
google_img = batch["google_img"][i:i+1].to(trainer.device)
|
||||||
scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).cpu().numpy()
|
yandex_img = batch["yandex_img"][i:i+1].to(trainer.device)
|
||||||
|
target_params = batch["homography_params"][i:i+1].to(trainer.device)
|
||||||
errors = np.array([tx_error[0], ty_error[0], rx_error[0], ry_error[0], rz_error[0], scale_error[0]])
|
pred_params = trainer.model(google_img, yandex_img)
|
||||||
targets = target_params[0].cpu().numpy()
|
decoded_pred = trainer.model.decode_output(pred_params)
|
||||||
preds = decoded_pred[0].cpu().numpy()
|
|
||||||
|
tx_error = torch.abs(decoded_pred[0, 0] - target_params[0, 0]).item()
|
||||||
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
ty_error = torch.abs(decoded_pred[0, 1] - target_params[0, 1]).item()
|
||||||
|
rx_error = angular_difference(decoded_pred[0, 2], target_params[0, 2]).item()
|
||||||
axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))
|
ry_error = angular_difference(decoded_pred[0, 3], target_params[0, 3]).item()
|
||||||
axes[0, 0].set_title(f"Google Image")
|
rz_error = angular_difference(decoded_pred[0, 4], target_params[0, 4]).item()
|
||||||
axes[0, 0].axis("off")
|
scale_error = torch.abs(decoded_pred[0, 5] - target_params[0, 5]).item()
|
||||||
|
|
||||||
axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))
|
errors = np.array([tx_error, ty_error, rx_error, ry_error, rz_error, scale_error])
|
||||||
axes[0, 1].set_title(f"Yandex Image")
|
targets = target_params[0].cpu().numpy()
|
||||||
axes[0, 1].axis("off")
|
preds = decoded_pred[0].cpu().numpy()
|
||||||
|
|
||||||
x_pos = np.arange(6)
|
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
||||||
width = 0.35
|
|
||||||
axes[1, 0].bar(x_pos - width/2, targets, width, label="Target", color="steelblue", alpha=0.8)
|
axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))
|
||||||
axes[1, 0].bar(x_pos + width/2, preds, width, label="Predicted", color="coral", alpha=0.8)
|
axes[0, 0].set_title(f"Google Image")
|
||||||
axes[1, 0].set_xticks(x_pos)
|
axes[0, 0].axis("off")
|
||||||
axes[1, 0].set_xticklabels(names)
|
|
||||||
axes[1, 0].set_ylabel("Parameter Value")
|
axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))
|
||||||
axes[1, 0].set_title("Target vs Predicted")
|
axes[0, 1].set_title(f"Yandex Image")
|
||||||
axes[1, 0].legend()
|
axes[0, 1].axis("off")
|
||||||
axes[1, 0].grid(True, alpha=0.3, axis="y")
|
|
||||||
|
x_pos = np.arange(6)
|
||||||
axes[1, 1].bar(x_pos, errors, color=["c", "m", "y", "g", "b", "r"], alpha=0.8)
|
width = 0.35
|
||||||
axes[1, 1].set_xticks(x_pos)
|
axes[1, 0].bar(x_pos - width/2, targets, width, label="Target", color="steelblue", alpha=0.8)
|
||||||
axes[1, 1].set_xticklabels(names)
|
axes[1, 0].bar(x_pos + width/2, preds, width, label="Predicted", color="coral", alpha=0.8)
|
||||||
axes[1, 1].set_ylabel("Absolute Error")
|
axes[1, 0].set_xticks(x_pos)
|
||||||
axes[1, 1].set_title(f"Prediction Error (Mean: {np.mean(errors):.4f})")
|
axes[1, 0].set_xticklabels(names)
|
||||||
axes[1, 1].grid(True, alpha=0.3, axis="y")
|
axes[1, 0].set_ylabel("Parameter Value")
|
||||||
for i, e in enumerate(errors):
|
axes[1, 0].set_title("Target vs Predicted")
|
||||||
axes[1, 1].text(i, e + 0.01, f"{e:.3f}", ha="center", va="bottom", fontsize=8)
|
axes[1, 0].legend()
|
||||||
|
axes[1, 0].grid(True, alpha=0.3, axis="y")
|
||||||
plt.suptitle(f"Sample {sample_idx + 1}", fontsize=14)
|
|
||||||
plt.tight_layout()
|
axes[1, 1].bar(x_pos, errors, color=["c", "m", "y", "g", "b", "r"], alpha=0.8)
|
||||||
plt.savefig(os.path.join(IMG_DIR, f"prediction_sample_{sample_idx + 1:02d}.png"), dpi=100)
|
axes[1, 1].set_xticks(x_pos)
|
||||||
plt.show()
|
axes[1, 1].set_xticklabels(names)
|
||||||
print(f"Saved prediction_sample_{sample_idx + 1:02d}.png")
|
axes[1, 1].set_ylabel("Absolute Error")
|
||||||
|
axes[1, 1].set_title(f"Prediction Error (Mean: {np.mean(errors):.4f})")
|
||||||
|
axes[1, 1].grid(True, alpha=0.3, axis="y")
|
||||||
|
for i_e, e in enumerate(errors):
|
||||||
|
axes[1, 1].text(i_e, e + 0.01, f"{e:.3f}", ha="center", va="bottom", fontsize=8)
|
||||||
|
|
||||||
|
plt.suptitle(f"Sample {vis_count + 1}", fontsize=14)
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(os.path.join(IMG_DIR, f"prediction_sample_{vis_count + 1:02d}.png"), dpi=100)
|
||||||
|
plt.show()
|
||||||
|
print(f"Saved prediction_sample_{vis_count + 1:02d}.png")
|
||||||
|
|
||||||
|
vis_count += 1
|
||||||
|
|
||||||
print(f"\nPrediction errors over {n_samples} samples:")
|
print(f"\nPrediction errors over {n_samples} samples:")
|
||||||
print(f"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}")
|
print(f"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}")
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ class YaGoDataset(Dataset):
|
|||||||
else:
|
else:
|
||||||
google_img = self._google_images[idx]
|
google_img = self._google_images[idx]
|
||||||
yandex_img = self._yandex_images[idx]
|
yandex_img = self._yandex_images[idx]
|
||||||
target_params = np.zeros(6, dtype=np.float32)
|
target_params = np.array([0, 0, 0, 0, 0, 1], dtype=np.float32)
|
||||||
target_matrix = np.eye(3, dtype=np.float32)
|
target_matrix = np.eye(3, dtype=np.float32)
|
||||||
|
|
||||||
google_img = Image.fromarray(google_img)
|
google_img = Image.fromarray(google_img)
|
||||||
|
|||||||
@@ -3,6 +3,12 @@ import torch.nn as nn
|
|||||||
from torchvision import models
|
from torchvision import models
|
||||||
|
|
||||||
|
|
||||||
|
def angular_difference(pred_angles, target_angles):
|
||||||
|
diff = pred_angles - target_angles
|
||||||
|
diff = torch.atan2(torch.sin(diff), torch.cos(diff))
|
||||||
|
return torch.abs(diff)
|
||||||
|
|
||||||
|
|
||||||
class HomographyCNN6(nn.Module):
|
class HomographyCNN6(nn.Module):
|
||||||
def __init__(self, input_channels=3, backbone_name="resnet18", pretrained=True, dropout_rate=0.3):
|
def __init__(self, input_channels=3, backbone_name="resnet18", pretrained=True, dropout_rate=0.3):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@@ -15,31 +21,53 @@ class HomographyCNN6(nn.Module):
|
|||||||
nn.Linear(self.feature_dim * 4, 512),
|
nn.Linear(self.feature_dim * 4, 512),
|
||||||
nn.ReLU(inplace=True),
|
nn.ReLU(inplace=True),
|
||||||
nn.Dropout(dropout_rate),
|
nn.Dropout(dropout_rate),
|
||||||
|
nn.Linear(1024, 512),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
nn.Dropout(dropout_rate),
|
||||||
nn.Linear(512, 256),
|
nn.Linear(512, 256),
|
||||||
nn.ReLU(inplace=True),
|
nn.ReLU(inplace=True),
|
||||||
nn.Dropout(dropout_rate),
|
nn.Dropout(dropout_rate),
|
||||||
nn.Linear(256, 9),
|
nn.Linear(512, 9),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _normalize_sin_cos(self, _sin, _cos):
|
||||||
|
_len = torch.sqrt(_sin ** 2 + _cos ** 2)
|
||||||
|
return _sin / _len, _cos / _len
|
||||||
|
|
||||||
def forward(self, img1, img2):
|
def forward(self, img1, img2):
|
||||||
f1 = self.backbone(img1)
|
f1 = self.backbone(img1)
|
||||||
f2 = self.backbone(img2)
|
f2 = self.backbone(img2)
|
||||||
combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)
|
combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)
|
||||||
|
|
||||||
|
combined[:, (0, 1)] = torch.tanh(combined[:, (0, 1)]) * 10 # [-10; 10]
|
||||||
|
combined[:, (2, 3)] = self._normalize_sin_cos(torch.tanh(combined[:, 2]), torch.tanh(combined[:, 3]))
|
||||||
|
combined[:, (4, 5)] = self._normalize_sin_cos(torch.tanh(combined[:, 4]), torch.tanh(combined[:, 5]))
|
||||||
|
combined[:, (6, 7)] = self._normalize_sin_cos(torch.tanh(combined[:, 6]), torch.tanh(combined[:, 7]))
|
||||||
|
|
||||||
return self.head(combined)
|
return self.head(combined)
|
||||||
|
|
||||||
def decode_output(self, output):
|
def decode_output(self, output):
|
||||||
tx, ty = output[:, 0], output[:, 1]
|
tx = output[:, 0]
|
||||||
sin1, cos1 = torch.tanh(output[:, 2]), torch.tanh(output[:, 3])
|
ty = output[:, 1]
|
||||||
sin2, cos2 = torch.tanh(output[:, 4]), torch.tanh(output[:, 5])
|
|
||||||
sin3, cos3 = torch.tanh(output[:, 6]), torch.tanh(output[:, 7])
|
|
||||||
scale = output[:, 8]
|
scale = output[:, 8]
|
||||||
|
|
||||||
angle1 = torch.atan2(sin1, cos1)
|
angle1 = torch.atan2(output[:, 2], output[:, 3])
|
||||||
angle2 = torch.atan2(sin2, cos2)
|
angle2 = torch.atan2(output[:, 4], output[:, 5])
|
||||||
angle3 = torch.atan2(sin3, cos3)
|
angle3 = torch.atan2(output[:, 6], output[:, 7])
|
||||||
|
|
||||||
return torch.stack([tx, ty, angle1, angle2, angle3, scale], dim=1)
|
return torch.stack([tx, ty, angle1, angle2, angle3, scale], dim=1)
|
||||||
|
|
||||||
|
def get_components(self, output):
|
||||||
|
decoded = self.decode_output(output)
|
||||||
|
return {
|
||||||
|
"tx": decoded[:, 0],
|
||||||
|
"ty": decoded[:, 1],
|
||||||
|
"rx": decoded[:, 2],
|
||||||
|
"ry": decoded[:, 3],
|
||||||
|
"rz": decoded[:, 4],
|
||||||
|
"scale": decoded[:, 5],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class HomographyLoss6(nn.Module):
|
class HomographyLoss6(nn.Module):
|
||||||
def __init__(self, angle_loss_weight=1.0, trans_loss_weight=1.0, scale_loss_weight=1.0):
|
def __init__(self, angle_loss_weight=1.0, trans_loss_weight=1.0, scale_loss_weight=1.0):
|
||||||
@@ -53,63 +81,50 @@ class HomographyLoss6(nn.Module):
|
|||||||
tx_loss = self.criterion(pred[:, 0], target[:, 0])
|
tx_loss = self.criterion(pred[:, 0], target[:, 0])
|
||||||
ty_loss = self.criterion(pred[:, 1], target[:, 1])
|
ty_loss = self.criterion(pred[:, 1], target[:, 1])
|
||||||
|
|
||||||
sin1_pred, cos1_pred = pred[:, 2], pred[:, 3]
|
sin_rx_pred = pred[:, 2]
|
||||||
sin2_pred, cos2_pred = pred[:, 4], pred[:, 5]
|
cos_rx_pred = pred[:, 3]
|
||||||
sin3_pred, cos3_pred = pred[:, 6], pred[:, 7]
|
sin_ry_pred = pred[:, 4]
|
||||||
|
cos_ry_pred = pred[:, 5]
|
||||||
|
sin_rz_pred = pred[:, 6]
|
||||||
|
cos_rz_pred = pred[:, 7]
|
||||||
|
|
||||||
sin1_target = torch.sin(target[:, 2])
|
sin_rx_target = torch.sin(target[:, 2])
|
||||||
cos1_target = torch.cos(target[:, 2])
|
cos_rx_target = torch.cos(target[:, 2])
|
||||||
sin2_target = torch.sin(target[:, 3])
|
sin_ry_target = torch.sin(target[:, 3])
|
||||||
cos2_target = torch.cos(target[:, 3])
|
cos_ry_target = torch.cos(target[:, 3])
|
||||||
sin3_target = torch.sin(target[:, 4])
|
sin_rz_target = torch.sin(target[:, 4])
|
||||||
cos3_target = torch.cos(target[:, 4])
|
cos_rz_target = torch.cos(target[:, 4])
|
||||||
|
|
||||||
sin1_pred_t = torch.tanh(sin1_pred)
|
dot_rx = sin_rx_pred * sin_rx_target + cos_rx_pred * cos_rx_target
|
||||||
cos1_pred_t = torch.tanh(cos1_pred)
|
dot_ry = sin_ry_pred * sin_ry_target + cos_ry_pred * cos_ry_target
|
||||||
sin2_pred_t = torch.tanh(sin2_pred)
|
dot_rz = sin_rz_pred * sin_rz_target + cos_rz_pred * cos_rz_target
|
||||||
cos2_pred_t = torch.tanh(cos2_pred)
|
|
||||||
sin3_pred_t = torch.tanh(sin3_pred)
|
rx_loss = (1 - dot_rx).mean()
|
||||||
cos3_pred_t = torch.tanh(cos3_pred)
|
ry_loss = (1 - dot_ry).mean()
|
||||||
|
rz_loss = (1 - dot_rz).mean()
|
||||||
angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean()
|
|
||||||
angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean()
|
|
||||||
angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean()
|
|
||||||
|
|
||||||
scale_loss = self.criterion(pred[:, 8], target[:, 5])
|
scale_loss = self.criterion(pred[:, 8], target[:, 5])
|
||||||
|
|
||||||
total_loss = (
|
total_loss = (
|
||||||
self.trans_loss_weight * (tx_loss + ty_loss) +
|
self.trans_loss_weight * (tx_loss + ty_loss) +
|
||||||
self.angle_loss_weight * (angle1_loss + angle2_loss + angle3_loss) +
|
self.angle_loss_weight * (rx_loss + ry_loss + rz_loss) +
|
||||||
self.scale_loss_weight * scale_loss
|
self.scale_loss_weight * scale_loss
|
||||||
)
|
)
|
||||||
|
|
||||||
return total_loss
|
return total_loss
|
||||||
|
|
||||||
def compute_mse_components(self, pred, target):
|
def compute_mse_components(self, pred, target):
|
||||||
tx_mse = self.criterion(pred[:, 0], target[:, 0]).item()
|
decoded = self.decode_output(pred)
|
||||||
ty_mse = self.criterion(pred[:, 1], target[:, 1]).item()
|
tx_mse = self.criterion(decoded[:, 0], target[:, 0]).item()
|
||||||
|
ty_mse = self.criterion(decoded[:, 1], target[:, 1]).item()
|
||||||
|
|
||||||
sin1_target = torch.sin(target[:, 2])
|
rx_mse = angular_difference(decoded[:, 2], target[:, 2]).item()
|
||||||
cos1_target = torch.cos(target[:, 2])
|
ry_mse = angular_difference(decoded[:, 3], target[:, 3]).item()
|
||||||
sin2_target = torch.sin(target[:, 3])
|
rz_mse = angular_difference(decoded[:, 4], target[:, 4]).item()
|
||||||
cos2_target = torch.cos(target[:, 3])
|
|
||||||
sin3_target = torch.sin(target[:, 4])
|
|
||||||
cos3_target = torch.cos(target[:, 4])
|
|
||||||
|
|
||||||
sin1_pred_t = torch.tanh(pred[:, 2])
|
scale_mse = self.criterion(decoded[:, 5], target[:, 5]).item()
|
||||||
cos1_pred_t = torch.tanh(pred[:, 3])
|
|
||||||
sin2_pred_t = torch.tanh(pred[:, 4])
|
|
||||||
cos2_pred_t = torch.tanh(pred[:, 5])
|
|
||||||
sin3_pred_t = torch.tanh(pred[:, 6])
|
|
||||||
cos3_pred_t = torch.tanh(pred[:, 7])
|
|
||||||
|
|
||||||
angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean().item()
|
|
||||||
angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean().item()
|
|
||||||
angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean().item()
|
|
||||||
|
|
||||||
scale_mse = self.criterion(pred[:, 8], target[:, 5]).item()
|
avg_angle_loss = (rx_mse + ry_mse + rz_mse) / 3
|
||||||
|
|
||||||
avg_angle_loss = (angle1_loss + angle2_loss + angle3_loss) / 3
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'trans': (tx_mse + ty_mse) / 2,
|
'trans': (tx_mse + ty_mse) / 2,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class HomographyTrainer:
|
|||||||
self.val_loader = val_loader
|
self.val_loader = val_loader
|
||||||
self.device = device
|
self.device = device
|
||||||
self.criterion = HomographyLoss6()
|
self.criterion = HomographyLoss6()
|
||||||
self.optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
|
self.optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"], weight_decay=1e-4)
|
||||||
self.writer = None
|
self.writer = None
|
||||||
self.best_val_loss = float("inf")
|
self.best_val_loss = float("inf")
|
||||||
self.train_losses = []
|
self.train_losses = []
|
||||||
@@ -49,7 +49,8 @@ class HomographyTrainer:
|
|||||||
total_loss += loss.item() * google_img.size(0)
|
total_loss += loss.item() * google_img.size(0)
|
||||||
total_samples += google_img.size(0)
|
total_samples += google_img.size(0)
|
||||||
|
|
||||||
mse_components = self.criterion.compute_mse_components(output, target)
|
decoded_output = self.model.decode_output(output)
|
||||||
|
mse_components = self.criterion.compute_mse_components(decoded_output, target)
|
||||||
mse_trans_sum += mse_components['trans'] * google_img.size(0)
|
mse_trans_sum += mse_components['trans'] * google_img.size(0)
|
||||||
mse_angle_sum += mse_components['angle'] * google_img.size(0)
|
mse_angle_sum += mse_components['angle'] * google_img.size(0)
|
||||||
mse_scale_sum += mse_components['scale'] * google_img.size(0)
|
mse_scale_sum += mse_components['scale'] * google_img.size(0)
|
||||||
@@ -72,11 +73,12 @@ class HomographyTrainer:
|
|||||||
yandex_img = batch["yandex_img"].to(self.device)
|
yandex_img = batch["yandex_img"].to(self.device)
|
||||||
target = batch["homography_params"].to(self.device)
|
target = batch["homography_params"].to(self.device)
|
||||||
output = self.model(google_img, yandex_img)
|
output = self.model(google_img, yandex_img)
|
||||||
|
decoded_output = self.model.decode_output(output)
|
||||||
loss = self.criterion(output, target)
|
loss = self.criterion(output, target)
|
||||||
total_loss += loss.item() * google_img.size(0)
|
total_loss += loss.item() * google_img.size(0)
|
||||||
total_samples += google_img.size(0)
|
total_samples += google_img.size(0)
|
||||||
|
|
||||||
mse_components = self.criterion.compute_mse_components(output, target)
|
mse_components = self.criterion.compute_mse_components(decoded_output, target)
|
||||||
mse_trans_sum += mse_components['trans'] * google_img.size(0)
|
mse_trans_sum += mse_components['trans'] * google_img.size(0)
|
||||||
mse_angle_sum += mse_components['angle'] * google_img.size(0)
|
mse_angle_sum += mse_components['angle'] * google_img.size(0)
|
||||||
mse_scale_sum += mse_components['scale'] * google_img.size(0)
|
mse_scale_sum += mse_components['scale'] * google_img.size(0)
|
||||||
|
|||||||
@@ -7,9 +7,9 @@ config = {
|
|||||||
"batch_size": 32,
|
"batch_size": 32,
|
||||||
"train_split": 0.8,
|
"train_split": 0.8,
|
||||||
"num_workers": 0,
|
"num_workers": 0,
|
||||||
"epochs": 100,
|
"epochs": 10,
|
||||||
"learning_rate": 2e-4,
|
"learning_rate": 2e-4,
|
||||||
"dropout_rate": 0.3,
|
"dropout_rate": 0.5,
|
||||||
"backbone": "resnet18",
|
"backbone": "resnet18",
|
||||||
"output_dir": r"C:\Users\admin\Projects\autopilot\models\SiaN\runs",
|
"output_dir": r"C:\Users\admin\Projects\autopilot\models\SiaN\runs",
|
||||||
"save_every_n_epochs": 15,
|
"save_every_n_epochs": 15,
|
||||||
@@ -27,11 +27,11 @@ def generate_random_homography_params(angle_range=10, translation_range=0.1, sca
|
|||||||
rx = np.radians(np.random.uniform(-angle_range, angle_range))
|
rx = np.radians(np.random.uniform(-angle_range, angle_range))
|
||||||
ry = np.radians(np.random.uniform(-angle_range, angle_range))
|
ry = np.radians(np.random.uniform(-angle_range, angle_range))
|
||||||
rz = np.radians(np.random.uniform(-angle_range, angle_range))
|
rz = np.radians(np.random.uniform(-angle_range, angle_range))
|
||||||
return np.array([rx, ry, rz, tx, ty, scale])
|
return np.array([tx, ty, rx, ry, rz, scale])
|
||||||
|
|
||||||
|
|
||||||
def homography_params_to_matrix(params, K):
|
def homography_params_to_matrix(params, K):
|
||||||
rx, ry, rz, tx, ty, scale = params
|
tx, ty, rx, ry, rz, scale = params
|
||||||
cy, sy = np.cos(rz), np.sin(rz)
|
cy, sy = np.cos(rz), np.sin(rz)
|
||||||
cp, sp = np.cos(ry), np.sin(ry)
|
cp, sp = np.cos(ry), np.sin(ry)
|
||||||
cr, sr = np.cos(rx), np.sin(rx)
|
cr, sr = np.cos(rx), np.sin(rx)
|
||||||
@@ -52,4 +52,4 @@ def matrix_to_homography_params(H, K):
|
|||||||
r20, r21 = E[2, 0], E[2, 1]
|
r20, r21 = E[2, 0], E[2, 1]
|
||||||
ry = np.arctan2(r20, r21)
|
ry = np.arctan2(r20, r21)
|
||||||
rx = np.arctan2(-E[1, 2], E[1, 1])
|
rx = np.arctan2(-E[1, 2], E[1, 1])
|
||||||
return np.array([rx, ry, rz, tx, ty, scale], dtype=np.float32)
|
return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)
|
||||||
|
|||||||
Reference in New Issue
Block a user