Files
autopilot/test_projection.ipynb
2026-01-10 17:48:54 +03:00

988 lines
30 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 57,
"id": "d2631d69",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Homography: [[ 1.45631545 -0.00167508 62.16857114]\n",
" [ 0.02150464 1.44269413 -56.2912924 ]\n",
" [ 0.00003243 0.00000175 1. ]]\n",
"4 [[[ 0.88083101 -0.05835966 0.46982006]\n",
" [-0.05373063 0.97363845 0.2216781 ]\n",
" [-0.47037193 -0.22050467 0.85447524]]\n",
"\n",
" [[ 0.88083101 -0.05835966 0.46982006]\n",
" [-0.05373063 0.97363845 0.2216781 ]\n",
" [-0.47037193 -0.22050467 0.85447524]]\n",
"\n",
" [[ 0.99993156 -0.0053206 -0.0104192 ]\n",
" [ 0.00528968 0.99998153 -0.00299237]\n",
" [ 0.01043493 0.00293705 0.99994124]]\n",
"\n",
" [[ 0.99993156 -0.0053206 -0.0104192 ]\n",
" [ 0.00528968 0.99998153 -0.00299237]\n",
" [ 0.01043493 0.00293705 0.99994124]]] [[[ 0.14085899]\n",
" [ 0.06967199]\n",
" [ 0.54817935]]\n",
"\n",
" [[-0.14085899]\n",
" [-0.06967199]\n",
" [-0.54817935]]\n",
"\n",
" [[-0.4413341 ]\n",
" [-0.20542618]\n",
" [ 0.2970191 ]]\n",
"\n",
" [[ 0.4413341 ]\n",
" [ 0.20542618]\n",
" [-0.2970191 ]]] [[[ 0.87244355]\n",
" [ 0.40302658]\n",
" [-0.27642691]]\n",
"\n",
" [[-0.87244355]\n",
" [-0.40302658]\n",
" [ 0.27642691]]\n",
"\n",
" [[-0.00858977]\n",
" [-0.00845358]\n",
" [-0.99992737]]\n",
"\n",
" [[ 0.00858977]\n",
" [ 0.00845358]\n",
" [ 0.99992737]]]\n",
"int32\n",
"[0 3]\n",
"[[1 2]]\n",
"Truth T: [[[ 1.26688695 -0.07978837 19.1537688 ]\n",
" [ -0.07694267 1.4040634 8.53253905]\n",
" [ 0.69082299 0.3191267 1.22072533]]\n",
"\n",
" [[ 1.26688695 -0.07978837 19.1537688 ]\n",
" [ -0.07694267 1.4040634 8.53253905]\n",
" [ 0.69082299 0.3191267 1.22072533]]\n",
"\n",
" [[ 1.44503941 0.0053461 221.32627361]\n",
" [ 0.00250041 1.44206795 101.8745381 ]\n",
" [ -0.00373716 -0.0036779 1.00456832]]\n",
"\n",
" [[ 1.44503941 0.0053461 221.32627361]\n",
" [ 0.00250041 1.44206795 101.8745381 ]\n",
" [ -0.00373716 -0.0036779 1.00456832]]]\n",
"Possible Solutions: [0 3]\n",
"[[[ 0.88083101 -0.05835966 0.46982006]\n",
" [-0.05373063 0.97363845 0.2216781 ]\n",
" [-0.47037193 -0.22050467 0.85447524]]\n",
"\n",
" [[ 0.88083101 -0.05835966 0.46982006]\n",
" [-0.05373063 0.97363845 0.2216781 ]\n",
" [-0.47037193 -0.22050467 0.85447524]]\n",
"\n",
" [[ 0.99993156 -0.0053206 -0.0104192 ]\n",
" [ 0.00528968 0.99998153 -0.00299237]\n",
" [ 0.01043493 0.00293705 0.99994124]]\n",
"\n",
" [[ 0.99993156 -0.0053206 -0.0104192 ]\n",
" [ 0.00528968 0.99998153 -0.00299237]\n",
" [ 0.01043493 0.00293705 0.99994124]]]\n",
"Translations: [[[ 49.3006458 ]\n",
" [ 24.38519785]\n",
" [ 191.86277366]]\n",
"\n",
" [[ -49.3006458 ]\n",
" [ -24.38519785]\n",
" [-191.86277366]]\n",
"\n",
" [[-154.46693485]\n",
" [ -71.89916133]\n",
" [ 103.95668379]]\n",
"\n",
" [[ 154.46693485]\n",
" [ 71.89916133]\n",
" [-103.95668379]]]\n",
"normals: [[[ 0.87244355]\n",
" [ 0.40302658]\n",
" [-0.27642691]]\n",
"\n",
" [[-0.87244355]\n",
" [-0.40302658]\n",
" [ 0.27642691]]\n",
"\n",
" [[-0.00858977]\n",
" [-0.00845358]\n",
" [-0.99992737]]\n",
"\n",
" [[ 0.00858977]\n",
" [ 0.00845358]\n",
" [ 0.99992737]]]\n",
"[[-14.54372108 28.02261667 3.79060488]\n",
" [-14.54372108 28.02261667 3.79060488]\n",
" [ 0.17145973 -0.59698726 0.30486582]\n",
" [ 0.17145973 -0.59698726 0.30486582]]\n"
]
},
{
"data": {
"text/plain": [
"np.float64(17262.208134164426)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pathlib import Path\n",
"import math\n",
"import cv2\n",
"\n",
"from scipy.spatial.transform import Rotation\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from PIL import Image\n",
"\n",
"from autopilot import AutoPilot\n",
"from vision_chunk import VisionChunk\n",
"from simulator import Simulator\n",
"from position import Position\n",
"from utility import cv2_to_pil\n",
"from visualization import VisualizationManager\n",
"\n",
"chunk = VisionChunk.load_image(Path('images') / 'photo_1.png')\n",
"image_width = chunk.image.size[0]\n",
"center = image_width / 2\n",
"\n",
"focus_length = image_width / 2\n",
"K = np.array([\n",
" [focus_length, 0, center],\n",
" [0, focus_length, center],\n",
" [0, 0, 1],\n",
"])\n",
"np.set_printoptions(suppress=True)\n",
"\n",
"def get_image(chunk: VisionChunk, geo: Position) -> tuple[Image.Image, np.ndarray]:\n",
" H = geo.get_homography_matrix(K)\n",
" # R = np.array(geo.get_rotation_matrix())\n",
" # Z = np.eye(3)\n",
" # Z[2, 2] = geo.z\n",
" # T = np.eye(3)\n",
" # T[0, 2] = geo.x\n",
" # T[1, 2] = geo.y\n",
" # H = K @ Z @ R @ np.linalg.inv(K)\n",
" warped = cv2.warpPerspective(np.array(chunk.image), H, (image_width, image_width), flags=cv2.INTER_LINEAR)\n",
" plt.imshow(warped)\n",
" plt.show()\n",
" return cv2_to_pil(warped), H\n",
"\n",
"\n",
"x = 150 / focus_length\n",
"y = 70 / focus_length\n",
"prev_geo = Position(0, 0, 1, math.radians(0), math.radians(0), math.radians(0))\n",
"cur_geo = Position(x, y, 0.7, math.radians(0), math.radians(0), math.radians(0))\n",
"image1, H1 = get_image(chunk, prev_geo)\n",
"image2, H2 = get_image(chunk, cur_geo)\n",
"\n",
"chunk1 = VisionChunk(image1, 'orb')\n",
"chunk2 = VisionChunk(image2, 'orb')\n",
"\n",
"autopilot = AutoPilot()\n",
"# pts_src, pts_dst = autopilot.calculate_optical_flow(chunk1, chunk2)\n",
"pts_src, pts_dst, *_ = chunk1.detect_and_match_keypoints(chunk2)\n",
"\n",
"\n",
"\n",
"# print(pts_src.shape)\n",
"H, _ = cv2.findHomography(pts_src, pts_dst, cv2.RANSAC, 0.5, maxIters=2000)\n",
"H = H @ H1\n",
"# H = np.linalg.inv(H)\n",
"print(\"Homography:\", H)\n",
"# print(\"Rev Homography:\", np.linalg.inv(H))\n",
"# num, R, t, n\n",
"num, R, t, n = cv2.decomposeHomographyMat(H, K) \n",
"R = np.array(R)\n",
"t = np.array(t)\n",
"n = np.array(n)\n",
"print(num, R, t, n)\n",
"\n",
"# print(\"Inversed decompose:\", cv2.decomposeHomographyMat(np.linalg.inv(H), K))\n",
"\n",
"inds = cv2.filterHomographyDecompByVisibleRefpoints(R, n, pts_src, pts_dst)\n",
"if inds is not None:\n",
" inds = inds.flatten()\n",
" print(inds.dtype)\n",
" print(inds.flatten())\n",
" # R = R[inds]\n",
" # t = t[inds]\n",
" # n = n[inds]\n",
"else:\n",
" print(\"[INDS]: None :(\")\n",
"\n",
"print(np.array([\n",
" [1, 2],\n",
" [3 ,4]\n",
"])[[0]])\n",
"\n",
"T = np.linalg.inv(R) @ np.linalg.inv(K) @ H @ K\n",
"T[:, :2, 2] *= focus_length\n",
"print(\"Truth T: \", T)\n",
"\n",
"print(\"Possible Solutions:\", inds)\n",
"print(R)\n",
"print(\"Translations:\", t * focus_length)\n",
"print(\"normals:\", n)\n",
"\n",
"# vm = VisualizationManager()\n",
"# vm.update_homography_grid(np.array(chunk2.image), H, 20)\n",
"\n",
"# print(H1)\n",
"# print(H2)\n",
"# print(\"Translation\", t)\n",
"print(Rotation.from_matrix(R).as_euler('XYZ', degrees=True))\n",
"\n",
"pts_src = pts_src.reshape((-1, 2))\n",
"pts_dst = pts_dst.reshape((-1, 2))\n",
"pts_src_3 = np.c_[np.array(pts_src), np.ones(np.array(pts_src).shape[0])]\n",
"pts_dst_3 = np.c_[np.array(pts_dst), np.ones(np.array(pts_dst).shape[0])]\n",
"Z = H2.copy()\n",
"# Z[:, 2] = [0, 0, 1]\n",
"# Z = np.linalg.inv(Z)\n",
"# Z[:, 2] = [0, 22, 1]\n",
"err = (pts_dst - (Z @ pts_src_3.T).T[:, :2]) ** 2\n",
"err.mean()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "573e9f36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([4, 1])"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.argpartition(np.array([4, 1, 2, 3, 0]), 2)[:2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af4a38dc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[[1., 0., 0., 0.],\n",
" [1., 0., 0., 0.]],\n",
"\n",
" [[0., 1., 0., 0.],\n",
" [0., 1., 0., 0.]]])"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 60,
"id": "bf6f822d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1., 0., 0.],\n",
" [0., 1., 0.],\n",
" [0., 0., 1.]])"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from numpy.linalg import inv\n",
"\n",
"inv(np.eye(3))"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "b35aab86",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.29428571428571426"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"-103 / 350"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "8bf8c3e2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([155.93773004, 78.15418433, 2.49470517])"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A = np.array([[ 0.81113805 , 0.00010604, 126.46549906],\n",
" [ 0.00012962, 0.81093256 , 63.38304349],\n",
" [ 0.00086776, 0.00035579 , 2.02320589]])\n",
"\n",
"P = np.array([0, 0, 1]) / 0.811\n",
"A @ P"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1ced4915",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1,\n",
" (array([[ 1.00000000e+00, 5.42096647e-17, -3.33066907e-16],\n",
" [ 5.41763505e-17, 1.00000000e+00, -7.77156117e-16],\n",
" [-5.76312734e-18, 9.51009073e-17, 1.00000000e+00]]),),\n",
" (array([[0.],\n",
" [0.],\n",
" [0.]]),),\n",
" (array([[0.],\n",
" [0.],\n",
" [0.]]),))"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import cv2\n",
"import numpy as np\n",
"H = np.array([[ 1.00000000e+00, 1.49310572e-16, -1.25807618e-13],\n",
" [ 4.84132232e-17, 1.00000000e+00, -1.66095704e-13],\n",
" [-1.64660781e-20, 2.71716878e-19, 1.00000000e+00]])\n",
"cv2.decomposeHomographyMat(H, np.array([\n",
" [350, 0, 350],\n",
" [0, 350, 350],\n",
" [0, 0, 1]\n",
"]))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fad63f09",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.int64(0)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ss = cv2.decomposeHomographyMat(np.eye(3), np.eye(3))\n",
"G: np.ndarray = ((ss[1] - cur_geo.get_rotation_matrix()) ** 2)\n",
"G.mean((1, 2)).argmin()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6665f057",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.141592653589793"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# ------------------------------------------------------------\n",
"\n",
"# E, mask = cv2.findEssentialMat(pts_src, pts_dst, K, method=cv2.RANSAC, prob=0.999, threshold=1.0)\n",
"\n",
"# # Извлечение поворота и трансляции\n",
"# _, R, t, mask = cv2.recoverPose(E, pts_src, pts_dst, K)\n",
"\n",
"# rotation_euler = Rotation.from_matrix(R).as_euler('ZYX', degrees=True)\n",
"# print(\"Rotation:\", rotation_euler)\n",
"\n",
"# vm = VisualizationManager()\n",
"# vm.update_homography_grid(np.array(chunk2.image), H, 20)\n",
"\n",
"# print(H1)\n",
"# print(H2)\n",
"# print(\"Translation\", t)\n",
"# Rotation.from_matrix(R).as_euler('ZYX', degrees=True)\n",
"\n",
"# ------------------------------------------------------------\n",
"\n",
"def estimate_rotation_from_homography(H, K, H_prev_estimate=None):\n",
" \"\"\"\n",
" Извлечение вращения из гомографии, игнорируя компоненту трансляции\n",
" \"\"\"\n",
" # Шаг 1: Преобразование в пространство нормализованных координат\n",
" K_inv = np.linalg.inv(K)\n",
" R_approx = K_inv @ H @ K\n",
" \n",
" # Шаг 2: SVD-ортогонализация для получения корректной матрицы вращения\n",
" U, S, Vt = np.linalg.svd(R_approx)\n",
" \n",
" # Проверка знака определителя\n",
" R = U @ Vt\n",
" if np.linalg.det(R) < 0:\n",
" # Инвертируем последний столбец U\n",
" U[:, -1] *= -1\n",
" R = U @ Vt\n",
" \n",
" return R\n",
"\n",
"# Использование:\n",
"# H, _ = cv2.findHomography(pts_src, pts_dst, cv2.RANSAC, 0.5, maxIters=2000)\n",
"# R_estimated = estimate_rotation_from_homography(H, K)\n",
"# rotation_euler = Rotation.from_matrix(R_estimated).as_euler('ZYX', degrees=True)\n",
"# print(\"Estimated rotation:\", rotation_euler)\n",
"\n",
"\n",
"\n",
"# im = bilinear_quad_to_square(np.array(chunk.image), [[0, 0], [100, 0], [100, 100], [0, 100]], 500, 500)\n",
"# cv2_to_pil(im)\n",
"\n",
"print(H1, H2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a38f9786",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.34560856 0.37318193 1. ]\n",
" [0.41831377 0.29715173 1. ]\n",
" [0.88572301 0.58160404 1. ]\n",
" [0.85899758 0.77806988 1. ]] [[-0.50714718 0.03888142 1. ]\n",
" [-0.49275814 0.1430908 1. ]\n",
" [-1.00607478 0.33253878 1. ]\n",
" [-1.14339579 0.18951426 1. ]]\n",
"[[-6.21408324e-01 -7.83486931e-01 7.05631752e-09]\n",
" [ 7.83486932e-01 -6.21408338e-01 -2.89870073e-09]\n",
" [ 8.26329402e-09 3.79110206e-08 1.00000000e+00]]\n",
"[[-0.62140831 -0.78348689 0. ]\n",
" [ 0.78348689 -0.62140831 0. ]\n",
" [ 0. 0. 1. ]]\n"
]
}
],
"source": [
"s = np.random.random((50, 3))\n",
"s[:, 2] = 1\n",
"yaw = np.degrees(3)\n",
"Z = np.array([\n",
" [np.cos(yaw), -np.sin(yaw), 0],\n",
" [np.sin(yaw), np.cos(yaw), 0],\n",
" [0, 0, 1],\n",
"])\n",
"p = (Z @ s.copy().T).T\n",
"print(s[:4], p[:4])\n",
"s = s[:, :2]\n",
"p = p[:, :2]\n",
"\n",
"print(cv2.findHomography(s, p)[0])\n",
"print(Z)"
]
},
{
"cell_type": "markdown",
"id": "4d13da7c",
"metadata": {},
"source": [
"## Откуда берётся смещение при повороте?\n",
"Ответ: ошибка содержится в определении местоположении ключевых точек"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55ba9c48",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import math\n",
"import cv2\n",
"\n",
"from scipy.spatial.transform import Rotation\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from PIL import Image\n",
"\n",
"from autopilot import AutoPilot\n",
"from vision_chunk import VisionChunk\n",
"from simulator import Simulator\n",
"from geolocation import Geolocation\n",
"from utility import cv2_to_pil\n",
"from visualization import VisualizationManager\n",
"\n",
"chunk = VisionChunk.load_image(Path('images') / 'photo_1.png')\n",
"image_width = chunk.image.size[0]\n",
"center = image_width / 2\n",
"\n",
"focus_length = 130\n",
"HEIGHT: float = 130\n",
"K = np.array([\n",
" [focus_length, 0, center],\n",
" [0, focus_length, center],\n",
" [0, 0, 1],\n",
"])\n",
"\n",
"\n",
"def bilinear_quad_to_square(img, pts_src, width, height):\n",
" \"\"\"\n",
" pts_src: массив 4 точек [[x0,y0], [x1,y1], [x2,y2], [x3,y3]]\n",
" в порядке: левый-верх, правый-верх, правый-низ, левый-низ\n",
" \"\"\"\n",
" pts = np.float32(pts_src)\n",
" \n",
" # Создаём сетку для результата\n",
" map_x = np.zeros((height, width), dtype=np.float32)\n",
" map_y = np.zeros((height, width), dtype=np.float32)\n",
"\n",
" for y in range(height):\n",
" v = y / (height - 1) if height > 1 else 0\n",
" for x in range(width):\n",
" u = x / (width - 1) if width > 1 else 0\n",
" \n",
" # Билинейная интерполяция углов четырёхугольника\n",
" px = (1 - u)*(1 - v)*pts[0,0] + u*(1 - v)*pts[1,0] + \\\n",
" (1 - u)*v*pts[3,0] + u*v*pts[2,0]\n",
" py = (1 - u)*(1 - v)*pts[0,1] + u*(1 - v)*pts[1,1] + \\\n",
" (1 - u)*v*pts[3,1] + u*v*pts[2,1]\n",
" \n",
" map_x[y, x] = px\n",
" map_y[y, x] = py\n",
"\n",
" # Применяем remap с билинейной интерполяцией цвета\n",
" result = cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR)\n",
" return result\n",
"\n",
"def get_image(chunk: VisionChunk, geo: Geolocation) -> tuple[Image.Image, np.ndarray]:\n",
" R = np.array(geo.get_rotation_matrix())\n",
"\n",
" T = np.array([\n",
" [1, 0, -math.tan(geo.pitch)],\n",
" [0, 1, -math.tan(geo.roll)],\n",
" [0, 0, 1],\n",
" ])\n",
" \n",
" T = np.array([\n",
" [1, 0, 0],\n",
" [0, 1, 0],\n",
" [0, 0, 1],\n",
" ])\n",
"\n",
"\n",
" # R[:, 2] = [0, 0, 1]\n",
" H = K @ T @ R @ np.linalg.inv(K)\n",
" # print(\"R:\", R)\n",
" # print(\"H:\", H)\n",
" # print(H @ np.array([[center, center, 1]]).T)\n",
" # print(center)\n",
"\n",
" corners = np.array([\n",
" [center - focus_length, center - focus_length, 1],\n",
" [center - focus_length, center + focus_length, 1],\n",
" [center + focus_length, center + focus_length, 1],\n",
" [center + focus_length, center - focus_length, 1],\n",
" ])\n",
"\n",
" corners = ((H) @ corners.T).T\n",
" corners[:, 0] /= corners[:, 2]\n",
" corners[:, 1] /= corners[:, 2]\n",
" corners = corners[:, :2]\n",
" # print(corners)\n",
" # plt.imshow(chunk.image)\n",
" # points = corners.tolist() + [corners[0]]\n",
" # plt.plot(*zip(*points), color='red')\n",
" # plt.show()\n",
"\n",
" width = focus_length * 2\n",
" pts_src = corners.astype('float32')\n",
" pts_dst = np.float32([\n",
" [0, 0],\n",
" [width - 1, 0],\n",
" [width - 1, width - 1],\n",
" [0, width - 1],\n",
" ])\n",
"\n",
" # print(pts_src, pts_dst)\n",
" M = cv2.getPerspectiveTransform(pts_src, pts_dst)\n",
" warped = cv2.warpPerspective(np.array(chunk.image), M, (width, width), flags=cv2.INTER_LINEAR)\n",
" # warped = cv2.warpAffine(np.array(chunk.image), H, (width, width), flags=cv2.INTER_LINEAR)\n",
" # warped = bilinear_quad_to_square(np.array(chunk.image), pts_src, width, width)\n",
" # plt.imshow(warped)\n",
" # plt.show()\n",
" return cv2_to_pil(warped), H\n",
"\n",
"\n",
"prev_geo = Geolocation(0, 0, 1, math.radians(0), math.radians(0), math.radians(0))\n",
"cur_geo = Geolocation(0, 0, 1, math.radians(10), math.radians(0), math.radians(0))\n",
"image1, H1 = get_image(chunk, prev_geo)\n",
"image2, H2 = get_image(chunk, cur_geo)\n",
"\n",
"chunk1 = VisionChunk(image1, 'akaze')\n",
"chunk2 = VisionChunk(image2, 'akaze')\n",
"\n",
"# autopilot = AutoPilot()\n",
"# # pts_src, pts_dst = autopilot.calculate_optical_flow(chunk1, chunk2)\n",
"pts_src, pts_dst, *_ = chunk1.detect_and_match_keypoints(chunk2)\n",
"\n",
"\n",
"\n",
"# print(pts_src.shape)\n",
"H, _ = cv2.findHomography(pts_src, pts_dst, cv2.RANSAC, 0.5, maxIters=1000)\n",
"print(\"Homography:\", H)\n",
"n, R, t, *o = cv2.decomposeHomographyMat(H, K)\n",
"\n",
"# vm = VisualizationManager()\n",
"# vm.update_homography_grid(np.array(chunk2.image), H, 20)\n",
"\n",
"# print(H1)\n",
"# print(H2)\n",
"# print(\"Translation\", t)\n",
"# print(Rotation.from_matrix(R).as_euler('ZYX', degrees=True))\n",
"\n",
"\n",
"pts_src = pts_src.reshape((-1, 2))\n",
"pts_dst = pts_dst.reshape((-1, 2))\n",
"pts_src_3 = np.c_[np.array(pts_src), np.ones(np.array(pts_src).shape[0])]\n",
"pts_dst_3 = np.c_[np.array(pts_dst), np.ones(np.array(pts_dst).shape[0])]\n",
"Z = H2.copy()\n",
"Z[:, 2] = [0, 0, 1]\n",
"Z = np.linalg.inv(Z)\n",
"Z[:, 2] = [0, 22, 1]\n",
"err = (pts_dst - (Z @ pts_src_3.T).T[:, :2]) ** 2\n",
"err.mean()"
]
},
{
"cell_type": "code",
"execution_count": 223,
"id": "b3a594de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[-2.041077998578922e-17, 0.3333333333333333], [6.123233995736766e-17, 1.0], [6.123233995736766e-17, -1.0], [-2.041077998578922e-17, -0.3333333333333333]]\n"
]
}
],
"source": [
"geo = Geolocation(0, 0, 1, *np.radians([0, 90, 0]))\n",
"R = geo.get_rotation_matrix()\n",
"\n",
"def norm(x: np.ndarray) -> np.ndarray:\n",
" return x / (x[2] + 2)\n",
"\n",
"def invr(p: np.ndarray, R: np.ndarray) -> np.ndarray:\n",
" return np.linalg.solve(np.array([\n",
" [R[0][0] - R[2][0] * p[0], R[0][1] - R[2][1] * p[0]],\n",
" [R[1][0] - R[2][0] * p[1], R[1][1] - R[2][1] * p[1]],\n",
" ]), np.array([\n",
" R[2][2] * p[0] - R[0][2],\n",
" R[2][2] * p[1] - R[1][2],\n",
" ]))\n",
"\n",
"# pt_src = np.array([-10000.99, 0, 1])\n",
"# pt_dst = norm(R @ pt_src)\n",
"pts_dst = np.array([\n",
" [-1, +1, 0],\n",
" [+1, +1, 0],\n",
" [+1, -1, 0],\n",
" [-1, -1, 0],\n",
"])\n",
"\n",
"# pts_src = [invr(p, R).tolist() for p in pts_dst]\n",
"pts_src = np.array([norm(p) for p in (R @ pts_dst.T).T])[:, :2].tolist()\n",
"print(pts_src)\n",
"\n",
"pts_draw = pts_src + pts_src[:1]\n",
"fig, ax = plt.subplots(1, 1)\n",
"# print(pts_src)\n",
"ax.plot(*zip(*pts_draw))\n",
"ax.set_xlim(-5, 5)\n",
"ax.set_ylim(-5, 5)\n",
"ax.set_aspect('equal')\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "db21cca4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[[ 4, 6]],\n",
"\n",
" [[ 6, 8]],\n",
"\n",
" [[ 8, 10]]])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"np.array([\n",
" [[1, 2]],\n",
" [[3, 4]],\n",
" [[5, 6]],\n",
"]) + np.array([3, 4])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea8f82bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.75189338 -0.27456284 352.22526201]\n",
" [ 0.06748123 0.78125365 48.06952898]\n",
" [ -0.00022715 -0.00024594 1. ]] [[350. 0. 350.]\n",
" [ 0. 350. 350.]\n",
" [ 0. 0. 1.]]\n",
"Ts: [[[ 0.82623933 -0.02899366 -0.05676676]\n",
" [-0.02881035 0.87503077 -0.02628644]\n",
" [ 0.32663485 0.15221405 1.18647679]]\n",
"\n",
" [[ 0.82623933 -0.02899366 -0.05676676]\n",
" [-0.02881035 0.87503077 -0.02628644]\n",
" [ 0.32663485 0.15221405 1.18647679]]\n",
"\n",
" [[ 0.88892726 0.00040257 0.38367359]\n",
" [ 0.00021926 0.88864413 0.17873312]\n",
" [ 0.00027199 0.00023264 1.11017551]]\n",
"\n",
" [[ 0.88892726 0.00040257 0.38367359]\n",
" [ 0.00021926 0.88864413 0.17873312]\n",
" [ 0.00027199 0.00023264 1.11017551]]]\n",
"T: [[0.88892726 0.00040257 0.38367359]\n",
" [0.00021926 0.88864413 0.17873312]\n",
" [0.00027199 0.00023264 1.11017551]]\n",
"ts: [[[ 0.10494197]\n",
" [ 0.08302777]\n",
" [ 0.52089416]]\n",
"\n",
" [[-0.10494197]\n",
" [-0.08302777]\n",
" [-0.52089416]]\n",
"\n",
" [[ 0.41630374]\n",
" [ 0.25035158]\n",
" [ 0.2307649 ]]\n",
"\n",
" [[-0.41630374]\n",
" [-0.25035158]\n",
" [-0.2307649 ]]]\n",
"t: [0.41630374 0.25035158 0.2307649 ]\n",
"Position(x=0.42, y=0.25, z=1.23, yaw=5.0°, pitch=0.9°, roll=-2.9°)\n"
]
},
{
"data": {
"text/plain": [
"(np.float64(145.70630950200106),\n",
" np.float64(87.62305149881264),\n",
" np.float64(1.2307649007528827))"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pathlib import Path\n",
"import math\n",
"import cv2\n",
"\n",
"from scipy.spatial.transform import Rotation\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from PIL import Image\n",
"\n",
"from autopilot import AutoPilot\n",
"from vision_chunk import VisionChunk\n",
"from simulator import Simulator\n",
"from position import Position\n",
"from utility import cv2_to_pil\n",
"from visualization import VisualizationManager\n",
"\n",
"chunk = VisionChunk.load_image(Path('images') / 'photo_1.png')\n",
"image_width = chunk.image.size[0]\n",
"center = image_width / 2\n",
"\n",
"focus_length = image_width / 2\n",
"K = np.array([\n",
" [focus_length, 0, center],\n",
" [0, focus_length, center],\n",
" [0, 0, 1],\n",
"])\n",
"np.set_printoptions(suppress=True)\n",
"\n",
"def get_image(chunk: VisionChunk, geo: Position) -> tuple[Image.Image, np.ndarray]:\n",
" H = geo.get_homography_matrix(K)\n",
" warped = cv2.warpPerspective(np.array(chunk.image), H, (image_width, image_width), flags=cv2.INTER_LINEAR)\n",
" plt.imshow(warped)\n",
" plt.show()\n",
" return cv2_to_pil(warped), H\n",
"\n",
"\n",
"x = 150 / focus_length\n",
"y = 70 / focus_length\n",
"prev_geo = Position(y, -x, 1.30, math.radians(-5), math.radians(-4), math.radians(3))\n",
"cur_geo = Position(x, y, 1.25, math.radians(5), math.radians(1), math.radians(-3))\n",
"image1, H1 = get_image(chunk, prev_geo)\n",
"image2, H2 = get_image(chunk, cur_geo)\n",
"\n",
"chunk1 = VisionChunk(image1, 'brisk')\n",
"chunk2 = VisionChunk(image2, 'brisk')\n",
"\n",
"autopilot = AutoPilot()\n",
"# pts_src, pts_dst = autopilot.calculate_optical_flow(chunk1, chunk2)\n",
"pts_src, pts_dst, *_ = chunk1.detect_and_match_keypoints(chunk2)\n",
"\n",
"# print(pts_src.shape)\n",
"H, _ = cv2.findHomography(pts_src, pts_dst, cv2.RANSAC, 0.5, maxIters=1000)\n",
"# H = H2 @ np.linalg.inv(H1)\n",
"\n",
"prev_geo.iapply(H, K)\n",
"print(prev_geo)\n",
"prev_geo.x * focus_length, prev_geo.y * focus_length, prev_geo.z"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f9e16a1b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.int64(0)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array([\n",
" [[0], [1], [2]],\n",
" [[4], [2], [6]],\n",
" [[2], [3], [5]],\n",
" [[7], [1], [5]],\n",
"]).sum((1, 2)).argmin()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}