diff --git a/models/SiaN/notebook.gen.ipynb b/models/SiaN/notebook.gen.ipynb index efdfc68..6c2d035 100644 --- a/models/SiaN/notebook.gen.ipynb +++ b/models/SiaN/notebook.gen.ipynb @@ -42,9 +42,9 @@ " \"batch_size\": 32,\n", " \"train_split\": 0.8,\n", " \"num_workers\": 0,\n", - " \"epochs\": 100,\n", + " \"epochs\": 10,\n", " \"learning_rate\": 2e-4,\n", - " \"dropout_rate\": 0.3,\n", + " \"dropout_rate\": 0.5,\n", " \"backbone\": \"resnet18\",\n", " \"output_dir\": r\"C:\\Users\\admin\\Projects\\autopilot\\models\\SiaN\\runs\",\n", " \"save_every_n_epochs\": 15,\n", @@ -62,11 +62,11 @@ " rx = np.radians(np.random.uniform(-angle_range, angle_range))\n", " ry = np.radians(np.random.uniform(-angle_range, angle_range))\n", " rz = np.radians(np.random.uniform(-angle_range, angle_range))\n", - " return np.array([rx, ry, rz, tx, ty, scale])\n", + " return np.array([tx, ty, rx, ry, rz, scale])\n", "\n", "\n", "def homography_params_to_matrix(params, K):\n", - " rx, ry, rz, tx, ty, scale = params\n", + " tx, ty, rx, ry, rz, scale = params\n", " cy, sy = np.cos(rz), np.sin(rz)\n", " cp, sp = np.cos(ry), np.sin(ry)\n", " cr, sr = np.cos(rx), np.sin(rx)\n", @@ -87,7 +87,7 @@ " r20, r21 = E[2, 0], E[2, 1]\n", " ry = np.arctan2(r20, r21)\n", " rx = np.arctan2(-E[1, 2], E[1, 1])\n", - " return np.array([rx, ry, rz, tx, ty, scale], dtype=np.float32)\n", + " return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)\n", "\n" ] }, @@ -189,7 +189,7 @@ " else:\n", " google_img = self._google_images[idx]\n", " yandex_img = self._yandex_images[idx]\n", - " target_params = np.zeros(6, dtype=np.float32)\n", + " target_params = np.array([0, 0, 0, 0, 0, 1], dtype=np.float32)\n", " target_matrix = np.eye(3, dtype=np.float32)\n", "\n", " google_img = Image.fromarray(google_img)\n", @@ -249,6 +249,12 @@ "metadata": {}, "outputs": [], "source": [ + "\n", + "\n", + "def angular_difference(pred_angles, target_angles):\n", + " diff = pred_angles - target_angles\n", + " diff = torch.atan2(torch.sin(diff), torch.cos(diff))\n", + " return torch.abs(diff)\n", "\n", "\n", "class HomographyCNN6(nn.Module):\n", @@ -263,31 +269,53 @@ " nn.Linear(self.feature_dim * 4, 512),\n", " nn.ReLU(inplace=True),\n", " nn.Dropout(dropout_rate),\n", + " nn.Linear(1024, 512),\n", + " nn.ReLU(inplace=True),\n", + " nn.Dropout(dropout_rate),\n", " nn.Linear(512, 256),\n", " nn.ReLU(inplace=True),\n", " nn.Dropout(dropout_rate),\n", - " nn.Linear(256, 9),\n", + " nn.Linear(512, 9),\n", " )\n", "\n", + " def _normalize_sin_cos(self, _sin, _cos):\n", + " _len = torch.sqrt(_sin ** 2 + _cos ** 2)\n", + " return _sin / _len, _cos / _len\n", + "\n", " def forward(self, img1, img2):\n", " f1 = self.backbone(img1)\n", " f2 = self.backbone(img2)\n", " combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1)\n", + "\n", + " combined[:, (0, 1)] = torch.tanh(combined[:, (0, 1)]) * 10 # [-10; 10]\n", + " combined[:, (2, 3)] = self._normalize_sin_cos(torch.tanh(combined[:, 2]), torch.tanh(combined[:, 3]))\n", + " combined[:, (4, 5)] = self._normalize_sin_cos(torch.tanh(combined[:, 4]), torch.tanh(combined[:, 5]))\n", + " combined[:, (6, 7)] = self._normalize_sin_cos(torch.tanh(combined[:, 6]), torch.tanh(combined[:, 7]))\n", + " \n", " return self.head(combined)\n", "\n", " def decode_output(self, output):\n", - " tx, ty = output[:, 0], output[:, 1]\n", - " sin1, cos1 = torch.tanh(output[:, 2]), torch.tanh(output[:, 3])\n", - " sin2, cos2 = torch.tanh(output[:, 4]), torch.tanh(output[:, 5])\n", - " sin3, cos3 = torch.tanh(output[:, 6]), torch.tanh(output[:, 7])\n", + " tx = output[:, 0]\n", + " ty = output[:, 1]\n", " scale = output[:, 8]\n", "\n", - " angle1 = torch.atan2(sin1, cos1)\n", - " angle2 = torch.atan2(sin2, cos2)\n", - " angle3 = torch.atan2(sin3, cos3)\n", + " angle1 = torch.atan2(output[:, 2], output[:, 3])\n", + " angle2 = torch.atan2(output[:, 4], output[:, 5])\n", + " angle3 = torch.atan2(output[:, 6], output[:, 7])\n", "\n", " return torch.stack([tx, ty, angle1, angle2, angle3, scale], dim=1)\n", "\n", + " def get_components(self, output):\n", + " decoded = self.decode_output(output)\n", + " return {\n", + " \"tx\": decoded[:, 0],\n", + " \"ty\": decoded[:, 1],\n", + " \"rx\": decoded[:, 2],\n", + " \"ry\": decoded[:, 3],\n", + " \"rz\": decoded[:, 4],\n", + " \"scale\": decoded[:, 5],\n", + " }\n", + "\n", "\n", "class HomographyLoss6(nn.Module):\n", " def __init__(self, angle_loss_weight=1.0, trans_loss_weight=1.0, scale_loss_weight=1.0):\n", @@ -301,63 +329,50 @@ " tx_loss = self.criterion(pred[:, 0], target[:, 0])\n", " ty_loss = self.criterion(pred[:, 1], target[:, 1])\n", "\n", - " sin1_pred, cos1_pred = pred[:, 2], pred[:, 3]\n", - " sin2_pred, cos2_pred = pred[:, 4], pred[:, 5]\n", - " sin3_pred, cos3_pred = pred[:, 6], pred[:, 7]\n", + " sin_rx_pred = pred[:, 2]\n", + " cos_rx_pred = pred[:, 3]\n", + " sin_ry_pred = pred[:, 4]\n", + " cos_ry_pred = pred[:, 5]\n", + " sin_rz_pred = pred[:, 6]\n", + " cos_rz_pred = pred[:, 7]\n", "\n", - " sin1_target = torch.sin(target[:, 2])\n", - " cos1_target = torch.cos(target[:, 2])\n", - " sin2_target = torch.sin(target[:, 3])\n", - " cos2_target = torch.cos(target[:, 3])\n", - " sin3_target = torch.sin(target[:, 4])\n", - " cos3_target = torch.cos(target[:, 4])\n", + " sin_rx_target = torch.sin(target[:, 2])\n", + " cos_rx_target = torch.cos(target[:, 2])\n", + " sin_ry_target = torch.sin(target[:, 3])\n", + " cos_ry_target = torch.cos(target[:, 3])\n", + " sin_rz_target = torch.sin(target[:, 4])\n", + " cos_rz_target = torch.cos(target[:, 4])\n", "\n", - " sin1_pred_t = torch.tanh(sin1_pred)\n", - " cos1_pred_t = torch.tanh(cos1_pred)\n", - " sin2_pred_t = torch.tanh(sin2_pred)\n", - " cos2_pred_t = torch.tanh(cos2_pred)\n", - " sin3_pred_t = torch.tanh(sin3_pred)\n", - " cos3_pred_t = torch.tanh(cos3_pred)\n", - " \n", - " angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean()\n", - " angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean()\n", - " angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean()\n", + " dot_rx = sin_rx_pred * sin_rx_target + cos_rx_pred * cos_rx_target\n", + " dot_ry = sin_ry_pred * sin_ry_target + cos_ry_pred * cos_ry_target\n", + " dot_rz = sin_rz_pred * sin_rz_target + cos_rz_pred * cos_rz_target\n", + "\n", + " rx_loss = (1 - dot_rx).mean()\n", + " ry_loss = (1 - dot_ry).mean()\n", + " rz_loss = (1 - dot_rz).mean()\n", "\n", " scale_loss = self.criterion(pred[:, 8], target[:, 5])\n", "\n", " total_loss = (\n", " self.trans_loss_weight * (tx_loss + ty_loss) +\n", - " self.angle_loss_weight * (angle1_loss + angle2_loss + angle3_loss) +\n", + " self.angle_loss_weight * (rx_loss + ry_loss + rz_loss) +\n", " self.scale_loss_weight * scale_loss\n", " )\n", "\n", " return total_loss\n", "\n", " def compute_mse_components(self, pred, target):\n", - " tx_mse = self.criterion(pred[:, 0], target[:, 0]).item()\n", - " ty_mse = self.criterion(pred[:, 1], target[:, 1]).item()\n", + " decoded = self.decode_output(pred)\n", + " tx_mse = self.criterion(decoded[:, 0], target[:, 0]).item()\n", + " ty_mse = self.criterion(decoded[:, 1], target[:, 1]).item()\n", "\n", - " sin1_target = torch.sin(target[:, 2])\n", - " cos1_target = torch.cos(target[:, 2])\n", - " sin2_target = torch.sin(target[:, 3])\n", - " cos2_target = torch.cos(target[:, 3])\n", - " sin3_target = torch.sin(target[:, 4])\n", - " cos3_target = torch.cos(target[:, 4])\n", + " rx_mse = angular_difference(decoded[:, 2], target[:, 2]).item()\n", + " ry_mse = angular_difference(decoded[:, 3], target[:, 3]).item()\n", + " rz_mse = angular_difference(decoded[:, 4], target[:, 4]).item()\n", "\n", - " sin1_pred_t = torch.tanh(pred[:, 2])\n", - " cos1_pred_t = torch.tanh(pred[:, 3])\n", - " sin2_pred_t = torch.tanh(pred[:, 4])\n", - " cos2_pred_t = torch.tanh(pred[:, 5])\n", - " sin3_pred_t = torch.tanh(pred[:, 6])\n", - " cos3_pred_t = torch.tanh(pred[:, 7])\n", - " \n", - " angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean().item()\n", - " angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean().item()\n", - " angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean().item()\n", + " scale_mse = self.criterion(decoded[:, 5], target[:, 5]).item()\n", "\n", - " scale_mse = self.criterion(pred[:, 8], target[:, 5]).item()\n", - "\n", - " avg_angle_loss = (angle1_loss + angle2_loss + angle3_loss) / 3\n", + " avg_angle_loss = (rx_mse + ry_mse + rz_mse) / 3\n", "\n", " return {\n", " 'trans': (tx_mse + ty_mse) / 2,\n", @@ -393,7 +408,7 @@ " self.val_loader = val_loader\n", " self.device = device\n", " self.criterion = HomographyLoss6()\n", - " self.optimizer = optim.Adam(model.parameters(), lr=config[\"learning_rate\"])\n", + " self.optimizer = optim.Adam(model.parameters(), lr=config[\"learning_rate\"], weight_decay=1e-4)\n", " self.writer = None\n", " self.best_val_loss = float(\"inf\")\n", " self.train_losses = []\n", @@ -424,7 +439,8 @@ " total_loss += loss.item() * google_img.size(0)\n", " total_samples += google_img.size(0)\n", " \n", - " mse_components = self.criterion.compute_mse_components(output, target)\n", + " decoded_output = self.model.decode_output(output)\n", + " mse_components = self.criterion.compute_mse_components(decoded_output, target)\n", " mse_trans_sum += mse_components['trans'] * google_img.size(0)\n", " mse_angle_sum += mse_components['angle'] * google_img.size(0)\n", " mse_scale_sum += mse_components['scale'] * google_img.size(0)\n", @@ -447,11 +463,12 @@ " yandex_img = batch[\"yandex_img\"].to(self.device)\n", " target = batch[\"homography_params\"].to(self.device)\n", " output = self.model(google_img, yandex_img)\n", + " decoded_output = self.model.decode_output(output)\n", " loss = self.criterion(output, target)\n", " total_loss += loss.item() * google_img.size(0)\n", " total_samples += google_img.size(0)\n", " \n", - " mse_components = self.criterion.compute_mse_components(output, target)\n", + " mse_components = self.criterion.compute_mse_components(decoded_output, target)\n", " mse_trans_sum += mse_components['trans'] * google_img.size(0)\n", " mse_angle_sum += mse_components['angle'] * google_img.size(0)\n", " mse_scale_sum += mse_components['scale'] * google_img.size(0)\n", @@ -513,12 +530,6 @@ "os.makedirs(IMG_DIR, exist_ok=True)\n", "\n", "\n", - "def angular_difference(pred_angles, target_angles):\n", - " diff = pred_angles - target_angles\n", - " diff = torch.atan2(torch.sin(diff), torch.cos(diff))\n", - " return torch.abs(diff)\n", - "\n", - "\n", "def analyze_training(trainer):\n", " print(\"=== Training Analysis ===\\n\")\n", "\n", @@ -538,37 +549,54 @@ " n_samples = 50\n", " names = [\"tx\", \"ty\", \"rx\", \"ry\", \"rz\", \"scale\"]\n", " \n", + " _, val_loader_for_analysis = create_data_loaders(\n", + " root_dir=config[\"data_dir\"],\n", + " batch_size=config[\"batch_size\"],\n", + " train_split=config[\"train_split\"],\n", + " num_workers=config[\"num_workers\"],\n", + " image_size=config[\"image_size\"],\n", + " augment_train=True,\n", + " cache_level=0,\n", + " )\n", + " \n", " with torch.no_grad():\n", " all_errors = [[] for _ in range(6)]\n", " all_targets = [[] for _ in range(6)]\n", " all_preds = [[] for _ in range(6)]\n", " \n", - " for i in range(n_samples):\n", - " try:\n", - " batch = next(iter(trainer.val_loader))\n", - " except StopIteration:\n", + " sample_count = 0\n", + " for batch in val_loader_for_analysis:\n", + " if sample_count >= n_samples:\n", " break\n", + " \n", " google_img = batch[\"google_img\"].to(trainer.device)\n", " yandex_img = batch[\"yandex_img\"].to(trainer.device)\n", " target_params = batch[\"homography_params\"].to(trainer.device)\n", " pred_params = trainer.model(google_img, yandex_img)\n", " decoded_pred = trainer.model.decode_output(pred_params)\n", " \n", - " tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).item()\n", - " ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).item()\n", - " rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).item()\n", - " ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).item()\n", - " rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).item()\n", - " scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).item()\n", - " \n", - " errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]\n", - " targets = target_params[0].cpu().numpy()\n", - " preds = decoded_pred[0].cpu().numpy()\n", - " \n", - " for j in range(6):\n", - " all_errors[j].append(errors[j])\n", - " all_targets[j].append(targets[j])\n", - " all_preds[j].append(preds[j])\n", + " batch_size = google_img.size(0)\n", + " for i in range(batch_size):\n", + " if sample_count >= n_samples:\n", + " break\n", + " \n", + " tx_error = torch.abs(decoded_pred[i, 0] - target_params[i, 0]).item()\n", + " ty_error = torch.abs(decoded_pred[i, 1] - target_params[i, 1]).item()\n", + " rx_error = angular_difference(decoded_pred[i, 2], target_params[i, 2]).item()\n", + " ry_error = angular_difference(decoded_pred[i, 3], target_params[i, 3]).item()\n", + " rz_error = angular_difference(decoded_pred[i, 4], target_params[i, 4]).item()\n", + " scale_error = torch.abs(decoded_pred[i, 5] - target_params[i, 5]).item()\n", + " \n", + " errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]\n", + " target_reordered = target_params[i].cpu().numpy()\n", + " pred_reordered = decoded_pred[i].cpu().numpy()\n", + " \n", + " for j in range(6):\n", + " all_errors[j].append(errors[j])\n", + " all_targets[j].append(target_reordered[j])\n", + " all_preds[j].append(pred_reordered[j])\n", + " \n", + " sample_count += 1\n", " \n", " mean_errors = [np.mean(all_errors[i]) for i in range(6)]\n", " std_errors = [np.std(all_errors[i]) for i in range(6)]\n", @@ -618,7 +646,7 @@ " for j in range(6):\n", " row = j // 3\n", " col = j % 3\n", - " axes[row, col].bar(range(n_samples), all_errors[j], color=\"steelblue\", alpha=0.7)\n", + " axes[row, col].bar(range(len(all_errors[j])), all_errors[j], color=\"steelblue\", alpha=0.7)\n", " axes[row, col].set_xlabel(\"Sample\")\n", " axes[row, col].set_ylabel(\"Absolute Error\")\n", " axes[row, col].set_title(f\"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}\")\n", @@ -657,63 +685,70 @@ " n_vis_samples = 20\n", " \n", " with torch.no_grad():\n", - " for sample_idx in range(n_vis_samples):\n", - " try:\n", - " batch = next(iter(trainer.val_loader))\n", - " except StopIteration:\n", + " vis_count = 0\n", + " for batch in val_loader_for_analysis:\n", + " if vis_count >= n_vis_samples:\n", " break\n", - " google_img = batch[\"google_img\"].to(trainer.device)\n", - " yandex_img = batch[\"yandex_img\"].to(trainer.device)\n", - " target_params = batch[\"homography_params\"].to(trainer.device)\n", - " pred_params = trainer.model(google_img, yandex_img)\n", - " decoded_pred = trainer.model.decode_output(pred_params)\n", + " batch_size = batch[\"google_img\"].size(0)\n", " \n", - " tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).cpu().numpy()\n", - " ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).cpu().numpy()\n", - " rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).cpu().numpy()\n", - " ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).cpu().numpy()\n", - " rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).cpu().numpy()\n", - " scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).cpu().numpy()\n", - " \n", - " errors = np.array([tx_error[0], ty_error[0], rx_error[0], ry_error[0], rz_error[0], scale_error[0]])\n", - " targets = target_params[0].cpu().numpy()\n", - " preds = decoded_pred[0].cpu().numpy()\n", - " \n", - " fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n", - " \n", - " axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))\n", - " axes[0, 0].set_title(f\"Google Image\")\n", - " axes[0, 0].axis(\"off\")\n", - " \n", - " axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))\n", - " axes[0, 1].set_title(f\"Yandex Image\")\n", - " axes[0, 1].axis(\"off\")\n", - " \n", - " x_pos = np.arange(6)\n", - " width = 0.35\n", - " axes[1, 0].bar(x_pos - width/2, targets, width, label=\"Target\", color=\"steelblue\", alpha=0.8)\n", - " axes[1, 0].bar(x_pos + width/2, preds, width, label=\"Predicted\", color=\"coral\", alpha=0.8)\n", - " axes[1, 0].set_xticks(x_pos)\n", - " axes[1, 0].set_xticklabels(names)\n", - " axes[1, 0].set_ylabel(\"Parameter Value\")\n", - " axes[1, 0].set_title(\"Target vs Predicted\")\n", - " axes[1, 0].legend()\n", - " axes[1, 0].grid(True, alpha=0.3, axis=\"y\")\n", - " \n", - " axes[1, 1].bar(x_pos, errors, color=[\"c\", \"m\", \"y\", \"g\", \"b\", \"r\"], alpha=0.8)\n", - " axes[1, 1].set_xticks(x_pos)\n", - " axes[1, 1].set_xticklabels(names)\n", - " axes[1, 1].set_ylabel(\"Absolute Error\")\n", - " axes[1, 1].set_title(f\"Prediction Error (Mean: {np.mean(errors):.4f})\")\n", - " axes[1, 1].grid(True, alpha=0.3, axis=\"y\")\n", - " for i, e in enumerate(errors):\n", - " axes[1, 1].text(i, e + 0.01, f\"{e:.3f}\", ha=\"center\", va=\"bottom\", fontsize=8)\n", - " \n", - " plt.suptitle(f\"Sample {sample_idx + 1}\", fontsize=14)\n", - " plt.tight_layout()\n", - " plt.savefig(os.path.join(IMG_DIR, f\"prediction_sample_{sample_idx + 1:02d}.png\"), dpi=100)\n", - " plt.show()\n", - " print(f\"Saved prediction_sample_{sample_idx + 1:02d}.png\")\n", + " for i in range(batch_size):\n", + " if vis_count >= n_vis_samples:\n", + " break\n", + " \n", + " google_img = batch[\"google_img\"][i:i+1].to(trainer.device)\n", + " yandex_img = batch[\"yandex_img\"][i:i+1].to(trainer.device)\n", + " target_params = batch[\"homography_params\"][i:i+1].to(trainer.device)\n", + " pred_params = trainer.model(google_img, yandex_img)\n", + " decoded_pred = trainer.model.decode_output(pred_params)\n", + " \n", + " tx_error = torch.abs(decoded_pred[0, 0] - target_params[0, 0]).item()\n", + " ty_error = torch.abs(decoded_pred[0, 1] - target_params[0, 1]).item()\n", + " rx_error = angular_difference(decoded_pred[0, 2], target_params[0, 2]).item()\n", + " ry_error = angular_difference(decoded_pred[0, 3], target_params[0, 3]).item()\n", + " rz_error = angular_difference(decoded_pred[0, 4], target_params[0, 4]).item()\n", + " scale_error = torch.abs(decoded_pred[0, 5] - target_params[0, 5]).item()\n", + " \n", + " errors = np.array([tx_error, ty_error, rx_error, ry_error, rz_error, scale_error])\n", + " targets = target_params[0].cpu().numpy()\n", + " preds = decoded_pred[0].cpu().numpy()\n", + " \n", + " fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n", + " \n", + " axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0))\n", + " axes[0, 0].set_title(f\"Google Image\")\n", + " axes[0, 0].axis(\"off\")\n", + " \n", + " axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0))\n", + " axes[0, 1].set_title(f\"Yandex Image\")\n", + " axes[0, 1].axis(\"off\")\n", + " \n", + " x_pos = np.arange(6)\n", + " width = 0.35\n", + " axes[1, 0].bar(x_pos - width/2, targets, width, label=\"Target\", color=\"steelblue\", alpha=0.8)\n", + " axes[1, 0].bar(x_pos + width/2, preds, width, label=\"Predicted\", color=\"coral\", alpha=0.8)\n", + " axes[1, 0].set_xticks(x_pos)\n", + " axes[1, 0].set_xticklabels(names)\n", + " axes[1, 0].set_ylabel(\"Parameter Value\")\n", + " axes[1, 0].set_title(\"Target vs Predicted\")\n", + " axes[1, 0].legend()\n", + " axes[1, 0].grid(True, alpha=0.3, axis=\"y\")\n", + " \n", + " axes[1, 1].bar(x_pos, errors, color=[\"c\", \"m\", \"y\", \"g\", \"b\", \"r\"], alpha=0.8)\n", + " axes[1, 1].set_xticks(x_pos)\n", + " axes[1, 1].set_xticklabels(names)\n", + " axes[1, 1].set_ylabel(\"Absolute Error\")\n", + " axes[1, 1].set_title(f\"Prediction Error (Mean: {np.mean(errors):.4f})\")\n", + " axes[1, 1].grid(True, alpha=0.3, axis=\"y\")\n", + " for i_e, e in enumerate(errors):\n", + " axes[1, 1].text(i_e, e + 0.01, f\"{e:.3f}\", ha=\"center\", va=\"bottom\", fontsize=8)\n", + " \n", + " plt.suptitle(f\"Sample {vis_count + 1}\", fontsize=14)\n", + " plt.tight_layout()\n", + " plt.savefig(os.path.join(IMG_DIR, f\"prediction_sample_{vis_count + 1:02d}.png\"), dpi=100)\n", + " plt.show()\n", + " print(f\"Saved prediction_sample_{vis_count + 1:02d}.png\")\n", + " \n", + " vis_count += 1\n", " \n", " print(f\"\\nPrediction errors over {n_samples} samples:\")\n", " print(f\"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}\")\n", diff --git a/models/SiaN/src/analyze.py b/models/SiaN/src/analyze.py index 3dedd62..07b56a3 100644 --- a/models/SiaN/src/analyze.py +++ b/models/SiaN/src/analyze.py @@ -3,18 +3,14 @@ import torch import numpy as np import matplotlib.pyplot as plt +from .dataloader import create_data_loaders +from .model import angular_difference from .utils import config IMG_DIR = os.path.join(config["output_dir"], "images") os.makedirs(IMG_DIR, exist_ok=True) -def angular_difference(pred_angles, target_angles): - diff = pred_angles - target_angles - diff = torch.atan2(torch.sin(diff), torch.cos(diff)) - return torch.abs(diff) - - def analyze_training(trainer): print("=== Training Analysis ===\n") @@ -34,37 +30,54 @@ def analyze_training(trainer): n_samples = 50 names = ["tx", "ty", "rx", "ry", "rz", "scale"] + _, val_loader_for_analysis = create_data_loaders( + root_dir=config["data_dir"], + batch_size=config["batch_size"], + train_split=config["train_split"], + num_workers=config["num_workers"], + image_size=config["image_size"], + augment_train=True, + cache_level=0, + ) + with torch.no_grad(): all_errors = [[] for _ in range(6)] all_targets = [[] for _ in range(6)] all_preds = [[] for _ in range(6)] - for i in range(n_samples): - try: - batch = next(iter(trainer.val_loader)) - except StopIteration: + sample_count = 0 + for batch in val_loader_for_analysis: + if sample_count >= n_samples: break + google_img = batch["google_img"].to(trainer.device) yandex_img = batch["yandex_img"].to(trainer.device) target_params = batch["homography_params"].to(trainer.device) pred_params = trainer.model(google_img, yandex_img) decoded_pred = trainer.model.decode_output(pred_params) - tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).item() - ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).item() - rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).item() - ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).item() - rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).item() - scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).item() - - errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error] - targets = target_params[0].cpu().numpy() - preds = decoded_pred[0].cpu().numpy() - - for j in range(6): - all_errors[j].append(errors[j]) - all_targets[j].append(targets[j]) - all_preds[j].append(preds[j]) + batch_size = google_img.size(0) + for i in range(batch_size): + if sample_count >= n_samples: + break + + tx_error = torch.abs(decoded_pred[i, 0] - target_params[i, 0]).item() + ty_error = torch.abs(decoded_pred[i, 1] - target_params[i, 1]).item() + rx_error = angular_difference(decoded_pred[i, 2], target_params[i, 2]).item() + ry_error = angular_difference(decoded_pred[i, 3], target_params[i, 3]).item() + rz_error = angular_difference(decoded_pred[i, 4], target_params[i, 4]).item() + scale_error = torch.abs(decoded_pred[i, 5] - target_params[i, 5]).item() + + errors = [tx_error, ty_error, rx_error, ry_error, rz_error, scale_error] + target_reordered = target_params[i].cpu().numpy() + pred_reordered = decoded_pred[i].cpu().numpy() + + for j in range(6): + all_errors[j].append(errors[j]) + all_targets[j].append(target_reordered[j]) + all_preds[j].append(pred_reordered[j]) + + sample_count += 1 mean_errors = [np.mean(all_errors[i]) for i in range(6)] std_errors = [np.std(all_errors[i]) for i in range(6)] @@ -114,7 +127,7 @@ def analyze_training(trainer): for j in range(6): row = j // 3 col = j % 3 - axes[row, col].bar(range(n_samples), all_errors[j], color="steelblue", alpha=0.7) + axes[row, col].bar(range(len(all_errors[j])), all_errors[j], color="steelblue", alpha=0.7) axes[row, col].set_xlabel("Sample") axes[row, col].set_ylabel("Absolute Error") axes[row, col].set_title(f"{names[j]}: Mean={np.mean(all_errors[j]):.4f}, Std={np.std(all_errors[j]):.4f}") @@ -153,63 +166,70 @@ def analyze_training(trainer): n_vis_samples = 20 with torch.no_grad(): - for sample_idx in range(n_vis_samples): - try: - batch = next(iter(trainer.val_loader)) - except StopIteration: + vis_count = 0 + for batch in val_loader_for_analysis: + if vis_count >= n_vis_samples: break - google_img = batch["google_img"].to(trainer.device) - yandex_img = batch["yandex_img"].to(trainer.device) - target_params = batch["homography_params"].to(trainer.device) - pred_params = trainer.model(google_img, yandex_img) - decoded_pred = trainer.model.decode_output(pred_params) + batch_size = batch["google_img"].size(0) - tx_error = torch.abs(decoded_pred[:, 0] - target_params[:, 0]).cpu().numpy() - ty_error = torch.abs(decoded_pred[:, 1] - target_params[:, 1]).cpu().numpy() - rx_error = angular_difference(decoded_pred[:, 2], target_params[:, 2]).cpu().numpy() - ry_error = angular_difference(decoded_pred[:, 3], target_params[:, 3]).cpu().numpy() - rz_error = angular_difference(decoded_pred[:, 4], target_params[:, 4]).cpu().numpy() - scale_error = torch.abs(decoded_pred[:, 5] - target_params[:, 5]).cpu().numpy() - - errors = np.array([tx_error[0], ty_error[0], rx_error[0], ry_error[0], rz_error[0], scale_error[0]]) - targets = target_params[0].cpu().numpy() - preds = decoded_pred[0].cpu().numpy() - - fig, axes = plt.subplots(2, 2, figsize=(12, 10)) - - axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0)) - axes[0, 0].set_title(f"Google Image") - axes[0, 0].axis("off") - - axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0)) - axes[0, 1].set_title(f"Yandex Image") - axes[0, 1].axis("off") - - x_pos = np.arange(6) - width = 0.35 - axes[1, 0].bar(x_pos - width/2, targets, width, label="Target", color="steelblue", alpha=0.8) - axes[1, 0].bar(x_pos + width/2, preds, width, label="Predicted", color="coral", alpha=0.8) - axes[1, 0].set_xticks(x_pos) - axes[1, 0].set_xticklabels(names) - axes[1, 0].set_ylabel("Parameter Value") - axes[1, 0].set_title("Target vs Predicted") - axes[1, 0].legend() - axes[1, 0].grid(True, alpha=0.3, axis="y") - - axes[1, 1].bar(x_pos, errors, color=["c", "m", "y", "g", "b", "r"], alpha=0.8) - axes[1, 1].set_xticks(x_pos) - axes[1, 1].set_xticklabels(names) - axes[1, 1].set_ylabel("Absolute Error") - axes[1, 1].set_title(f"Prediction Error (Mean: {np.mean(errors):.4f})") - axes[1, 1].grid(True, alpha=0.3, axis="y") - for i, e in enumerate(errors): - axes[1, 1].text(i, e + 0.01, f"{e:.3f}", ha="center", va="bottom", fontsize=8) - - plt.suptitle(f"Sample {sample_idx + 1}", fontsize=14) - plt.tight_layout() - plt.savefig(os.path.join(IMG_DIR, f"prediction_sample_{sample_idx + 1:02d}.png"), dpi=100) - plt.show() - print(f"Saved prediction_sample_{sample_idx + 1:02d}.png") + for i in range(batch_size): + if vis_count >= n_vis_samples: + break + + google_img = batch["google_img"][i:i+1].to(trainer.device) + yandex_img = batch["yandex_img"][i:i+1].to(trainer.device) + target_params = batch["homography_params"][i:i+1].to(trainer.device) + pred_params = trainer.model(google_img, yandex_img) + decoded_pred = trainer.model.decode_output(pred_params) + + tx_error = torch.abs(decoded_pred[0, 0] - target_params[0, 0]).item() + ty_error = torch.abs(decoded_pred[0, 1] - target_params[0, 1]).item() + rx_error = angular_difference(decoded_pred[0, 2], target_params[0, 2]).item() + ry_error = angular_difference(decoded_pred[0, 3], target_params[0, 3]).item() + rz_error = angular_difference(decoded_pred[0, 4], target_params[0, 4]).item() + scale_error = torch.abs(decoded_pred[0, 5] - target_params[0, 5]).item() + + errors = np.array([tx_error, ty_error, rx_error, ry_error, rz_error, scale_error]) + targets = target_params[0].cpu().numpy() + preds = decoded_pred[0].cpu().numpy() + + fig, axes = plt.subplots(2, 2, figsize=(12, 10)) + + axes[0, 0].imshow(google_img[0].cpu().permute(1, 2, 0)) + axes[0, 0].set_title(f"Google Image") + axes[0, 0].axis("off") + + axes[0, 1].imshow(yandex_img[0].cpu().permute(1, 2, 0)) + axes[0, 1].set_title(f"Yandex Image") + axes[0, 1].axis("off") + + x_pos = np.arange(6) + width = 0.35 + axes[1, 0].bar(x_pos - width/2, targets, width, label="Target", color="steelblue", alpha=0.8) + axes[1, 0].bar(x_pos + width/2, preds, width, label="Predicted", color="coral", alpha=0.8) + axes[1, 0].set_xticks(x_pos) + axes[1, 0].set_xticklabels(names) + axes[1, 0].set_ylabel("Parameter Value") + axes[1, 0].set_title("Target vs Predicted") + axes[1, 0].legend() + axes[1, 0].grid(True, alpha=0.3, axis="y") + + axes[1, 1].bar(x_pos, errors, color=["c", "m", "y", "g", "b", "r"], alpha=0.8) + axes[1, 1].set_xticks(x_pos) + axes[1, 1].set_xticklabels(names) + axes[1, 1].set_ylabel("Absolute Error") + axes[1, 1].set_title(f"Prediction Error (Mean: {np.mean(errors):.4f})") + axes[1, 1].grid(True, alpha=0.3, axis="y") + for i_e, e in enumerate(errors): + axes[1, 1].text(i_e, e + 0.01, f"{e:.3f}", ha="center", va="bottom", fontsize=8) + + plt.suptitle(f"Sample {vis_count + 1}", fontsize=14) + plt.tight_layout() + plt.savefig(os.path.join(IMG_DIR, f"prediction_sample_{vis_count + 1:02d}.png"), dpi=100) + plt.show() + print(f"Saved prediction_sample_{vis_count + 1:02d}.png") + + vis_count += 1 print(f"\nPrediction errors over {n_samples} samples:") print(f"{'Param':<8} {'Mean Error':>12} {'Std Error':>12} {'Min':>8} {'Max':>8}") diff --git a/models/SiaN/src/dataloader.py b/models/SiaN/src/dataloader.py index f021051..4aa204b 100644 --- a/models/SiaN/src/dataloader.py +++ b/models/SiaN/src/dataloader.py @@ -95,7 +95,7 @@ class YaGoDataset(Dataset): else: google_img = self._google_images[idx] yandex_img = self._yandex_images[idx] - target_params = np.zeros(6, dtype=np.float32) + target_params = np.array([0, 0, 0, 0, 0, 1], dtype=np.float32) target_matrix = np.eye(3, dtype=np.float32) google_img = Image.fromarray(google_img) diff --git a/models/SiaN/src/model.py b/models/SiaN/src/model.py index 3711a92..7c81726 100644 --- a/models/SiaN/src/model.py +++ b/models/SiaN/src/model.py @@ -3,6 +3,12 @@ import torch.nn as nn from torchvision import models +def angular_difference(pred_angles, target_angles): + diff = pred_angles - target_angles + diff = torch.atan2(torch.sin(diff), torch.cos(diff)) + return torch.abs(diff) + + class HomographyCNN6(nn.Module): def __init__(self, input_channels=3, backbone_name="resnet18", pretrained=True, dropout_rate=0.3): super().__init__() @@ -15,31 +21,53 @@ class HomographyCNN6(nn.Module): nn.Linear(self.feature_dim * 4, 512), nn.ReLU(inplace=True), nn.Dropout(dropout_rate), + nn.Linear(1024, 512), + nn.ReLU(inplace=True), + nn.Dropout(dropout_rate), nn.Linear(512, 256), nn.ReLU(inplace=True), nn.Dropout(dropout_rate), - nn.Linear(256, 9), + nn.Linear(512, 9), ) + def _normalize_sin_cos(self, _sin, _cos): + _len = torch.sqrt(_sin ** 2 + _cos ** 2) + return _sin / _len, _cos / _len + def forward(self, img1, img2): f1 = self.backbone(img1) f2 = self.backbone(img2) combined = torch.cat([f1, f2, torch.abs(f1 - f2), f1 * f2], dim=1) + + combined[:, (0, 1)] = torch.tanh(combined[:, (0, 1)]) * 10 # [-10; 10] + combined[:, (2, 3)] = self._normalize_sin_cos(torch.tanh(combined[:, 2]), torch.tanh(combined[:, 3])) + combined[:, (4, 5)] = self._normalize_sin_cos(torch.tanh(combined[:, 4]), torch.tanh(combined[:, 5])) + combined[:, (6, 7)] = self._normalize_sin_cos(torch.tanh(combined[:, 6]), torch.tanh(combined[:, 7])) + return self.head(combined) def decode_output(self, output): - tx, ty = output[:, 0], output[:, 1] - sin1, cos1 = torch.tanh(output[:, 2]), torch.tanh(output[:, 3]) - sin2, cos2 = torch.tanh(output[:, 4]), torch.tanh(output[:, 5]) - sin3, cos3 = torch.tanh(output[:, 6]), torch.tanh(output[:, 7]) + tx = output[:, 0] + ty = output[:, 1] scale = output[:, 8] - angle1 = torch.atan2(sin1, cos1) - angle2 = torch.atan2(sin2, cos2) - angle3 = torch.atan2(sin3, cos3) + angle1 = torch.atan2(output[:, 2], output[:, 3]) + angle2 = torch.atan2(output[:, 4], output[:, 5]) + angle3 = torch.atan2(output[:, 6], output[:, 7]) return torch.stack([tx, ty, angle1, angle2, angle3, scale], dim=1) + def get_components(self, output): + decoded = self.decode_output(output) + return { + "tx": decoded[:, 0], + "ty": decoded[:, 1], + "rx": decoded[:, 2], + "ry": decoded[:, 3], + "rz": decoded[:, 4], + "scale": decoded[:, 5], + } + class HomographyLoss6(nn.Module): def __init__(self, angle_loss_weight=1.0, trans_loss_weight=1.0, scale_loss_weight=1.0): @@ -53,63 +81,50 @@ class HomographyLoss6(nn.Module): tx_loss = self.criterion(pred[:, 0], target[:, 0]) ty_loss = self.criterion(pred[:, 1], target[:, 1]) - sin1_pred, cos1_pred = pred[:, 2], pred[:, 3] - sin2_pred, cos2_pred = pred[:, 4], pred[:, 5] - sin3_pred, cos3_pred = pred[:, 6], pred[:, 7] + sin_rx_pred = pred[:, 2] + cos_rx_pred = pred[:, 3] + sin_ry_pred = pred[:, 4] + cos_ry_pred = pred[:, 5] + sin_rz_pred = pred[:, 6] + cos_rz_pred = pred[:, 7] - sin1_target = torch.sin(target[:, 2]) - cos1_target = torch.cos(target[:, 2]) - sin2_target = torch.sin(target[:, 3]) - cos2_target = torch.cos(target[:, 3]) - sin3_target = torch.sin(target[:, 4]) - cos3_target = torch.cos(target[:, 4]) + sin_rx_target = torch.sin(target[:, 2]) + cos_rx_target = torch.cos(target[:, 2]) + sin_ry_target = torch.sin(target[:, 3]) + cos_ry_target = torch.cos(target[:, 3]) + sin_rz_target = torch.sin(target[:, 4]) + cos_rz_target = torch.cos(target[:, 4]) - sin1_pred_t = torch.tanh(sin1_pred) - cos1_pred_t = torch.tanh(cos1_pred) - sin2_pred_t = torch.tanh(sin2_pred) - cos2_pred_t = torch.tanh(cos2_pred) - sin3_pred_t = torch.tanh(sin3_pred) - cos3_pred_t = torch.tanh(cos3_pred) - - angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean() - angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean() - angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean() + dot_rx = sin_rx_pred * sin_rx_target + cos_rx_pred * cos_rx_target + dot_ry = sin_ry_pred * sin_ry_target + cos_ry_pred * cos_ry_target + dot_rz = sin_rz_pred * sin_rz_target + cos_rz_pred * cos_rz_target + + rx_loss = (1 - dot_rx).mean() + ry_loss = (1 - dot_ry).mean() + rz_loss = (1 - dot_rz).mean() scale_loss = self.criterion(pred[:, 8], target[:, 5]) total_loss = ( self.trans_loss_weight * (tx_loss + ty_loss) + - self.angle_loss_weight * (angle1_loss + angle2_loss + angle3_loss) + + self.angle_loss_weight * (rx_loss + ry_loss + rz_loss) + self.scale_loss_weight * scale_loss ) return total_loss def compute_mse_components(self, pred, target): - tx_mse = self.criterion(pred[:, 0], target[:, 0]).item() - ty_mse = self.criterion(pred[:, 1], target[:, 1]).item() + decoded = self.decode_output(pred) + tx_mse = self.criterion(decoded[:, 0], target[:, 0]).item() + ty_mse = self.criterion(decoded[:, 1], target[:, 1]).item() - sin1_target = torch.sin(target[:, 2]) - cos1_target = torch.cos(target[:, 2]) - sin2_target = torch.sin(target[:, 3]) - cos2_target = torch.cos(target[:, 3]) - sin3_target = torch.sin(target[:, 4]) - cos3_target = torch.cos(target[:, 4]) + rx_mse = angular_difference(decoded[:, 2], target[:, 2]).item() + ry_mse = angular_difference(decoded[:, 3], target[:, 3]).item() + rz_mse = angular_difference(decoded[:, 4], target[:, 4]).item() - sin1_pred_t = torch.tanh(pred[:, 2]) - cos1_pred_t = torch.tanh(pred[:, 3]) - sin2_pred_t = torch.tanh(pred[:, 4]) - cos2_pred_t = torch.tanh(pred[:, 5]) - sin3_pred_t = torch.tanh(pred[:, 6]) - cos3_pred_t = torch.tanh(pred[:, 7]) - - angle1_loss = (1 - (sin1_pred_t * sin1_target + cos1_pred_t * cos1_target)).mean().item() - angle2_loss = (1 - (sin2_pred_t * sin2_target + cos2_pred_t * cos2_target)).mean().item() - angle3_loss = (1 - (sin3_pred_t * sin3_target + cos3_pred_t * cos3_target)).mean().item() + scale_mse = self.criterion(decoded[:, 5], target[:, 5]).item() - scale_mse = self.criterion(pred[:, 8], target[:, 5]).item() - - avg_angle_loss = (angle1_loss + angle2_loss + angle3_loss) / 3 + avg_angle_loss = (rx_mse + ry_mse + rz_mse) / 3 return { 'trans': (tx_mse + ty_mse) / 2, diff --git a/models/SiaN/src/train.py b/models/SiaN/src/train.py index 290b3b0..a9582ee 100644 --- a/models/SiaN/src/train.py +++ b/models/SiaN/src/train.py @@ -18,7 +18,7 @@ class HomographyTrainer: self.val_loader = val_loader self.device = device self.criterion = HomographyLoss6() - self.optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"]) + self.optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"], weight_decay=1e-4) self.writer = None self.best_val_loss = float("inf") self.train_losses = [] @@ -49,7 +49,8 @@ class HomographyTrainer: total_loss += loss.item() * google_img.size(0) total_samples += google_img.size(0) - mse_components = self.criterion.compute_mse_components(output, target) + decoded_output = self.model.decode_output(output) + mse_components = self.criterion.compute_mse_components(decoded_output, target) mse_trans_sum += mse_components['trans'] * google_img.size(0) mse_angle_sum += mse_components['angle'] * google_img.size(0) mse_scale_sum += mse_components['scale'] * google_img.size(0) @@ -72,11 +73,12 @@ class HomographyTrainer: yandex_img = batch["yandex_img"].to(self.device) target = batch["homography_params"].to(self.device) output = self.model(google_img, yandex_img) + decoded_output = self.model.decode_output(output) loss = self.criterion(output, target) total_loss += loss.item() * google_img.size(0) total_samples += google_img.size(0) - mse_components = self.criterion.compute_mse_components(output, target) + mse_components = self.criterion.compute_mse_components(decoded_output, target) mse_trans_sum += mse_components['trans'] * google_img.size(0) mse_angle_sum += mse_components['angle'] * google_img.size(0) mse_scale_sum += mse_components['scale'] * google_img.size(0) diff --git a/models/SiaN/src/utils.py b/models/SiaN/src/utils.py index d83d906..1be134d 100644 --- a/models/SiaN/src/utils.py +++ b/models/SiaN/src/utils.py @@ -7,9 +7,9 @@ config = { "batch_size": 32, "train_split": 0.8, "num_workers": 0, - "epochs": 100, + "epochs": 10, "learning_rate": 2e-4, - "dropout_rate": 0.3, + "dropout_rate": 0.5, "backbone": "resnet18", "output_dir": r"C:\Users\admin\Projects\autopilot\models\SiaN\runs", "save_every_n_epochs": 15, @@ -27,11 +27,11 @@ def generate_random_homography_params(angle_range=10, translation_range=0.1, sca rx = np.radians(np.random.uniform(-angle_range, angle_range)) ry = np.radians(np.random.uniform(-angle_range, angle_range)) rz = np.radians(np.random.uniform(-angle_range, angle_range)) - return np.array([rx, ry, rz, tx, ty, scale]) + return np.array([tx, ty, rx, ry, rz, scale]) def homography_params_to_matrix(params, K): - rx, ry, rz, tx, ty, scale = params + tx, ty, rx, ry, rz, scale = params cy, sy = np.cos(rz), np.sin(rz) cp, sp = np.cos(ry), np.sin(ry) cr, sr = np.cos(rx), np.sin(rx) @@ -52,4 +52,4 @@ def matrix_to_homography_params(H, K): r20, r21 = E[2, 0], E[2, 1] ry = np.arctan2(r20, r21) rx = np.arctan2(-E[1, 2], E[1, 1]) - return np.array([rx, ry, rz, tx, ty, scale], dtype=np.float32) + return np.array([tx, ty, rx, ry, rz, scale], dtype=np.float32)