Compare commits

...

25 Commits

Author SHA1 Message Date
27d5c6eaab Merge pull request 'feat/pitch-roll' (#1) from feat/pitch-roll into main
Reviewed-on: #1
2026-02-20 16:50:15 +03:00
6040f3b253 feat: change git ignores, partial update todo.md 2026-02-20 16:48:19 +03:00
6d4208d100 feat: add SiaN model 2026-02-16 19:07:31 +03:00
zenloger
339e5f210c fix 2026-02-05 21:49:15 +03:00
zenloger
5385641d28 fix: use pixel_ratio in distance evaluation 2026-02-05 21:49:03 +03:00
64c9215f5b feat: add fps/landmark debugging 2026-01-12 15:47:03 +03:00
7cd700c1fa feat: min-ref-distance 2026-01-12 13:54:41 +03:00
05c249ed78 feat: add new arguments, correct yandex map 2026-01-12 13:31:37 +03:00
fbd0d01b35 feat: add constraints for landmark correction 2026-01-12 00:46:44 +03:00
e00878daad feat: convert from pixels to meters 2026-01-12 00:04:23 +03:00
ceca8a6e75 feat: dynamic map; refactor code and fix bugs 2026-01-11 23:45:19 +03:00
6456d18212 ref: move *.html to examples 2026-01-11 13:59:26 +03:00
3ee3599b87 feat: chunks from google 2026-01-11 13:54:37 +03:00
31c0f13361 ref(YandexMap): add initial_coordinates 2026-01-10 17:49:29 +03:00
c5ea48601e research(projection) 2026-01-10 17:48:54 +03:00
02981ca660 feat: custom background, add translation matrix 2026-01-10 17:48:37 +03:00
57f11685a0 fix: coordinates 2026-01-10 17:47:56 +03:00
17594bc8fc feat: add perspective transform 2026-01-10 17:47:32 +03:00
155bf17847 ref: add constants 2026-01-10 17:46:42 +03:00
1f1175a156 ref: geolocation -> position 2026-01-10 17:46:31 +03:00
69829d85fa chore: remove old sources 2026-01-10 17:44:56 +03:00
6d58b9f436 feat: add transform_app 2026-01-09 09:48:38 +03:00
ec81ce95a5 feat: add pitch, roll in Geolocation 2026-01-05 11:49:29 +05:00
063c61589a feat: edit simulator 2026-01-05 11:37:40 +05:00
e41d276676 feat: add simulations 2026-01-05 11:37:25 +05:00
44 changed files with 8316 additions and 2204 deletions

3
.gitignore vendored
View File

@@ -1,4 +1,5 @@
.venv
__pycache__
*.png
images
trajectories
z

View File

@@ -3,6 +3,7 @@ from pathlib import Path
import math
import random
import constants
import cv2
import numpy as np
from PIL import Image
@@ -11,7 +12,9 @@ from timer import Timer
from vision_chunk import VisionChunk
from visualization import VisualizationManager
from geolocation import Geolocation
from position import Position
from constants import CAMERA_HEIGHT
from utility import estimate_transformation_matrix
random.seed(1)
@@ -41,7 +44,7 @@ class PilotCommand:
class AutoPilot(Pilot):
# Состояние одометрии
geo: Geolocation
pos: Position
chunks: list[VisionChunk] # Ориентиры
target_idx: int # Текущий ориентир
@@ -51,17 +54,19 @@ class AutoPilot(Pilot):
prev_chunk: VisionChunk | None
frame_count: int
vis_manager: VisualizationManager # Менеджер визуализации (опционально)
# Положение на основе ориентира
reserved_geo: Geolocation | None
def __init__(self, points = [], chunks = [], viz_manager=None):
# Положение на основе ориентира
reserved_pos: Position | None
proccessing_time: float
def __init__(self, points = [], chunks = [], viz_manager=None, pixel_ratio: float = 1.):
self.prev_chunk = None
self.geo = Geolocation(0, 0, 1, 0)
self.pos = Position(0, 0, 1, 0, 0, 0)
self.chunks = chunks
self.frame_count = 0
self.vis_manager = viz_manager # Менеджер визуализации
self.reserved_geo = None
self.reserved_pos = None
self.pixel_ratio = pixel_ratio
# Пороговые значения качества сопоставления/гомографии
self.min_inliers: int = 12
@@ -74,9 +79,10 @@ class AutoPilot(Pilot):
self.target_idx = 0
self.timer = Timer()
self.chunk_points = np.array([[chunk.pos.x, chunk.pos.y] for chunk in self.chunks])
def get_position(self) -> tuple[float, float]:
return self.geo.x, self.geo.y
return self.pos.x, self.pos.y
def calculate_optical_flow(self, prev_chunk: VisionChunk, current_chunk: VisionChunk):
"""
@@ -87,10 +93,9 @@ class AutoPilot(Pilot):
current_gray = current_chunk.to_cv2_gray()
h, w = prev_gray.shape[:2]
cx, cy = w // 2, h // 2
# Создаем сетку точек для отслеживания (аналогично вашему step=20)
step = 35
step = 20
grid_points = []
for y in range(step, h - step, step):
for x in range(step, w - step, step):
@@ -120,136 +125,147 @@ class AutoPilot(Pilot):
return None, None
# Преобразуем в отцентрированные координаты (Y вверх)
src_pts = np.float32([[x - cx, cy - y] for x, y in good_old]).reshape(-1, 1, 2)
dst_pts = np.float32([[x - cx, cy - y] for x, y in good_new]).reshape(-1, 1, 2)
src_pts = np.float32([[x, y] for x, y in good_old]).reshape(-1, 1, 2)
dst_pts = np.float32([[x, y] for x, y in good_new]).reshape(-1, 1, 2)
return src_pts, dst_pts
def estimate_transformation_matrix(self, src_pts: np.ndarray, dst_pts: np.ndarray):
"""
Оценивает матрицу трансформации на основе сопоставленных точек
Точки уже отцентрированы относительно центра изображения
"""
# Используем RANSAC для оценки матрицы гомографии
print("Count of points: ", len(src_pts), len(dst_pts))
H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0, maxIters=1000)
# RH, rmask = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 1.0)
if H is None:
return None
# Маска инлайеров
inliers = int(mask.sum()) if mask is not None else 0
total = int(mask.size) if mask is not None else 0
inlier_ratio = (inliers / total) if total > 0 else 0.0
# Ошибка репроекции по инлайерам (RMSE)
rmse = None
try:
# Проецируем src через H и сравниваем с dst
proj = cv2.perspectiveTransform(src_pts, H)
errs = np.linalg.norm((proj - dst_pts).reshape(-1, 2), axis=1)
if mask is not None and mask.shape[0] == errs.shape[0]:
errs = errs[mask.ravel().astype(bool)]
if errs.size > 0:
rmse = float(np.sqrt(np.mean(errs ** 2)))
except Exception:
rmse = None
# Извлекаем параметры трансформации из матрицы гомографии
# H = [a11 a12 tx]
# [a21 a22 ty]
# [0 0 1 ]
# Масштаб и поворот
a11, a12 = H[0, 0], H[0, 1]
a21, a22 = H[1, 0], H[1, 1]
# Смещение (уже отцентрировано)
tx, ty = -H[0, 2], -H[1, 2]
# print(" [Pilot] translate:", tx, ty)
# Вычисляем угол поворота
angle = -np.arctan2(a21, a11)
# Вычисляем масштаб
scale_x = np.sqrt(a11**2 + a21**2)
scale_y = np.sqrt(a12**2 + a22**2)
scale = (scale_x + scale_y) / 2
return {
'translation': (tx, ty), # Уже отцентрировано
'rotation': angle,
'scale': scale,
'homography': H,
'mask': mask,
'inliers': inliers,
'inliers_ratio': inlier_ratio,
'rmse': rmse
}
def update_drone_position(self, transformation_info: dict):
def update_drone_position(self, homography_matrix: np.ndarray):
"""
Обновляет позицию и угол БПЛА на основе трансформации изображения
Координаты уже отцентрированы относительно центра изображения
"""
homography = transformation_info['homography']
self.geo @= homography
if self.reserved_geo is not None:
self.reserved_geo @= homography
self.pos.iapply(homography_matrix)
def get_drone_state(self) -> dict:
"""
Возвращает текущее состояние БПЛА
"""
return {
'x': self.geo.x,
'y': self.geo.y,
'angle': self.geo.angle,
'angle_degrees': math.degrees(self.geo.angle),
'x': self.pos.x,
'y': self.pos.y,
'angle': self.pos.yaw,
'angle_degrees': math.degrees(self.pos.yaw),
'frame_count': self.frame_count
}
def get_position_by_chunk(self) -> Geolocation | None:
# Пытаемся найти ориентир на картинке:
def get_position_by_chunk(self) -> Position | None:
landmark_timer = Timer()
landmark_timer.start()
cur_pos = np.array([self.pos.x, self.pos.y])
closest_chunk_idx = ((self.chunk_points - cur_pos) ** 2).sum(1).argmin()
current_chunk = self.prev_chunk
landmark_chunk = self.chunks[self.target_idx]
landmark_chunk = self.chunks[closest_chunk_idx]
if constants.DEBUG_FPS:
print(f"[LANDMARK]: Closest chunk finding: {landmark_timer.loop() * 1000:.2f} ms")
# Краевой случай: отсутствие чанков
if current_chunk is None or landmark_chunk is None:
return None
landmark_timer.start()
src_pts, dst_pts, matches, kp1, kp2 = landmark_chunk.detect_and_match_keypoints(current_chunk)
if constants.DEBUG_FPS:
print(f"[LANDMARK]: detect and match keypoints: {landmark_timer.loop() * 1000:.2f} ms")
landmark_timer.stop()
# Визуализация (если нужна)
if src_pts is not None and dst_pts is not None and self.vis_manager:
was_enabled = self.timer.enabled
if was_enabled: self.timer.stop()
self.vis_manager.update_chunk_matches(landmark_chunk.to_numpy(), current_chunk.to_numpy(), kp1, kp2, matches)
if was_enabled: self.timer.start()
if was_enabled:
self.timer.stop()
self.vis_manager.update_chunk_matches(
landmark_chunk.to_numpy(),
current_chunk.to_numpy(),
kp1, kp2, matches
)
if was_enabled:
self.timer.start()
if src_pts is not None and dst_pts is not None:
# Оцениваем матрицу трансформации
landmark_transform = self.estimate_transformation_matrix(src_pts, dst_pts)
# Если ориентир достоверно найден — скорректируем глобальные координаты и угол
if landmark_transform is not None:
ok_scale = (self.min_scale <= landmark_transform['scale'] <= self.max_scale)
ok_inliers = (landmark_transform.get('inliers', 0) >= self.min_inliers)
ratio = landmark_transform.get('inliers_ratio', 0.0)
ok_ratio = (ratio >= self.min_inlier_ratio)
rmse = landmark_transform.get('rmse', None)
ok_rmse = (rmse is not None and rmse <= self.max_reproj_rmse)
if ok_scale and ok_inliers and ok_ratio and ok_rmse:
# print("[HELP]")
# print("Matrix", landmark_transform['homography'])
# print("Position", self.x, self.y)
# print("Position of point", self.points[self.target_idx])
# print("[PILOT]", rmse, ratio, ok_rmse)
# if False:
# Считаем абсолютную позу относительно координат ориентира
landmark_world_x, landmark_world_y = self.points[self.target_idx]
landmark = Geolocation(landmark_world_x, landmark_world_y, 1, 0)
homography = landmark_transform['homography']
# homography = np.linalg.inv(homography)
print(f" [Pilot] Landmark correction applied (inliers={landmark_transform['inliers']}, ratio={ratio:.2f}, rmse={rmse:.2f})")
return landmark @ homography
return None
landmark_timer.start()
# Краевой случай: нет точек или недостаточно матчей
if src_pts is None or dst_pts is None:
return None
num_matches = len(src_pts)
if num_matches < 20:
return None
# Оценка матрицы гомографии
landmark_timer.loop()
landmark_transform, mask = estimate_transformation_matrix(src_pts, dst_pts)
num_inliers = int(np.sum(mask))
if constants.DEBUG_FPS:
print(f"[LANDMARK]: matrix estimation: {landmark_timer.loop() * 1000:.2f} ms")
# Краевой случай: матрица не найдена
if landmark_transform is None or mask is None:
return None
# === КРИТЕРИИ ПРИНЯТИЯ РЕШЕНИЯ ===
# 1. Минимальное количество инлайеров (абсолютное)
MIN_INLIERS_ABSOLUTE = 6
if num_inliers < MIN_INLIERS_ABSOLUTE:
return None
# 2. Процент инлайеров от общего числа матчей
inlier_ratio = num_inliers / num_matches
if constants.DEBUG_LANDMARK:
print("[LANDMARK]: inlier_ratio=", inlier_ratio)
MIN_INLIER_RATIO = 0.6
if inlier_ratio < MIN_INLIER_RATIO:
return None
# 3. Проверка качества гомографии (детерминант для выявления вырожденных случаев)
det = np.linalg.det(landmark_transform[:2, :2])
# Детерминант должен быть близок к 1 (без сильного масштабирования)
if abs(det) < 0.1 or abs(det) > 10.0:
return None
# 4. Проверка на валидность перспективного преобразования
# Элементы третьей строки не должны быть слишком большими
if abs(landmark_transform[2, 0]) > 0.01 or abs(landmark_transform[2, 1]) > 0.01:
return None
# 5. Дополнительная проверка: средняя ошибка репроекции для инлайеров
inlier_src = src_pts[mask.ravel() == 1]
inlier_dst = dst_pts[mask.ravel() == 1]
# Преобразуем точки через найденную гомографию
transformed_pts = cv2.perspectiveTransform(inlier_src, landmark_transform)
# Вычисляем ошибку репроекции
reprojection_errors = np.sqrt(np.sum((transformed_pts - inlier_dst) ** 2, axis=2))
mean_error = np.mean(reprojection_errors)
MAX_MEAN_REPROJECTION_ERROR = 1.1 # пиксели
if constants.DEBUG_LANDMARK:
print("[LANDMARK]: Mean_error=", mean_error)
if mean_error > MAX_MEAN_REPROJECTION_ERROR:
return None
# 6. Проверка стабильности: если слишком много хороших совпадений, но мало инлайеров - подозрительно
if num_matches > 50 and inlier_ratio < 0.15:
return None
# === ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ ===
print("[LANDMARK]: Correction Applied")
if constants.DEBUG_FPS:
print(f"[LANDMARK]: time: {landmark_timer.get_elapsed() * 1000:.2f} ms")
return landmark_chunk.pos.apply(landmark_transform)
def handle(self, current_chunk: VisionChunk) -> PilotCommand:
@@ -260,58 +276,57 @@ class AutoPilot(Pilot):
self.prev_chunk = current_chunk
self.timer.stop()
return PilotCommand(processing_time=self.timer.get_elapsed())
# Расстояние до цели
distance_to_target = math.sqrt(
(self.points[self.target_idx][0] - self.geo.x) ** 2 +
(self.points[self.target_idx][1] - self.geo.y) ** 2
)
# Вычисляем оптический поток для покадрового сравнения
optical_flow_timer = Timer()
optical_flow_timer.start()
matching_timer = Timer()
matching_timer.start()
src_pts, dst_pts = self.calculate_optical_flow(self.prev_chunk, current_chunk)
optical_flow_timer.stop()
print(f"Optical flow calculating: {optical_flow_timer.get_elapsed() * 1000:.2f} ms")
# src_pts, dst_pts, _, _, _ = self.prev_chunk.detect_and_match_keypoints(current_chunk)
matching_timer.stop()
print(f"Matching calculating: {matching_timer.get_elapsed() * 1000:.2f} ms")
# Оцениваем смещение БПЛА
if src_pts is not None and dst_pts is not None:
# Оцениваем матрицу трансформации
matrix_estimation_timer = Timer()
matrix_estimation_timer.start()
transformation_info = self.estimate_transformation_matrix(src_pts, dst_pts)
homography_matrix, _ = estimate_transformation_matrix(src_pts, dst_pts)
matrix_estimation_timer.stop()
print(f"Transformation matrix estimating: {matrix_estimation_timer.get_elapsed() * 1000:.2f} ms")
if transformation_info:
# Обновляем позицию и угол БПЛА (одометрия по кадрам)
self.update_drone_position(transformation_info)
print(f"Transformation matrix updating: {matrix_estimation_timer.get_elapsed() * 1000:.2f} ms")
if homography_matrix is not None:
self.update_drone_position(homography_matrix)
self.timer.stop()
# Обновляем визуализацию
if self.vis_manager:
# Используем визуализацию движения точек по сетке
homography_matrix = transformation_info['homography']
self.vis_manager.update_homography_grid(
current_chunk.to_numpy(),
homography_matrix,
grid_step=85)
current_chunk.to_numpy(),
homography_matrix,
grid_step=35)
self.timer.start()
chunk_timer = Timer()
chunk_timer.start()
# Пытаемся найти ориентир на картинке:
self.prev_chunk = current_chunk
# npos = self.get_position_by_chunk()
# if npos is not None:
# self.reserved_geo = npos
# Для улучшения среднего FPS
if self.frame_count % 5 == 0:
pos_by_chunk = self.get_position_by_chunk()
if pos_by_chunk is not None:
self.pos = pos_by_chunk
chunk_timer.stop()
print(f"Chunk timer: {chunk_timer.get_elapsed() * 1000:.2f} ms")
command = self.make_command()
self.timer.reset()
return command
def make_command(self) -> PilotCommand:
# Расстояние до цели
distance_to_target = math.sqrt(
(self.points[self.target_idx][0] - self.pos.x) ** 2 +
(self.points[self.target_idx][1] - self.pos.y) ** 2
) * self.pixel_ratio
if distance_to_target < 35:
self.target_idx += 1
@@ -320,20 +335,36 @@ class AutoPilot(Pilot):
return PilotCommand(stop=True)
distance_to_target = math.sqrt(
(self.points[self.target_idx][0] - self.geo.x) ** 2 +
(self.points[self.target_idx][1] - self.geo.y) ** 2
(self.points[self.target_idx][0] - self.pos.x) ** 2 +
(self.points[self.target_idx][1] - self.pos.y) ** 2
)
if self.reserved_geo is not None:
self.geo = self.reserved_geo
self.reserved_geo = None
angle_trajectory = self.pos.yaw + math.pi / 2
# Проверка на слепую зону
R = 120
blind = np.array([
[
self.pos.x * self.pixel_ratio + R * np.cos(angle_trajectory - np.pi / 2),
self.pos.y * self.pixel_ratio + R * np.sin(angle_trajectory - np.pi / 2),
],
[
self.pos.x * self.pixel_ratio + R * np.cos(angle_trajectory + np.pi / 2),
self.pos.y * self.pixel_ratio + R * np.sin(angle_trajectory + np.pi / 2),
]
])
blind -= self.points[self.target_idx] * self.pixel_ratio
blind = np.hypot(blind[:, 0], blind[:, 1])
print("R: ", blind)
if np.min(blind) < R:
return PilotCommand(0, 10, False, self.timer.get_elapsed())
# Вычисляем угол к цели
target_angle = math.atan2(self.points[self.target_idx][1] - self.geo.y, self.points[self.target_idx][0] - self.geo.x)
angle_trajectory = self.geo.angle + math.pi / 2
# print("[ANGLE]", angle_trajectory, "->", target_angle)
target_angle = math.atan2(self.points[self.target_idx][1] - self.pos.y, self.points[self.target_idx][0] - self.pos.x)
# Вычисляем разность углов (направление поворота)
angle_diff = target_angle - angle_trajectory
@@ -343,16 +374,15 @@ class AutoPilot(Pilot):
if angle_diff >= math.pi:
angle_diff -= 2 * math.pi
d_r = max(10, min(35., distance_to_target / 2))
d_a_limit = d_r / 10 * 0.01
d_r = max(5, min(10., distance_to_target / 2))
d_a_limit = np.radians(5)
command = PilotCommand(
max(min(d_a_limit, angle_diff), -d_a_limit),
d_r, False, self.timer.get_elapsed()
)
self.timer.reset()
return command
def reset_position(self, x: float = 0.0, y: float = 0.0, angle: float = 0.0):
"""Сбрасывает позицию и угол БПЛА"""
self.geo = Geolocation(x, y, 1, angle)
self.pos = Position(x, y, 1, angle)

35
constants.py Normal file
View File

@@ -0,0 +1,35 @@
import numpy as np
# Ширина 1 пикселя в метрах
YANDEX_PIXEL_RATIO = {
15: 2830 / 1049,
18: 350 / 1049,
}
GOOGLE_PIXEL_RATIO = {
15: 2766 / 1031,
18: 346 / 1031,
}
WINDOW_HEIGHT = 1031
# Ширина и высота снимка в пикселях
CHUNK_WIDTH = 700
# Высота, с которой произведен снимок (в метрах)
CAMERA_HEIGHT = 150
# Угол обзора
CAMERA_FOV = np.radians(90)
# Матрица параметров камеры
_K_FOCUS_DISTANCE = np.tan(CAMERA_FOV / 2) * CHUNK_WIDTH / 2
_K_CENTER = CHUNK_WIDTH / 2
K = np.array([
[_K_FOCUS_DISTANCE, 0, _K_CENTER],
[0, _K_FOCUS_DISTANCE, _K_CENTER],
[0, 0, 1]
])
DEBUG_FPS: bool = False
DEBUG_LANDMARK: bool = False

2
datasets/ya_go_maps/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
images/homography_cache
*.zip

View File

@@ -0,0 +1,43 @@
import numpy as np
from pathlib import Path
from ...google_map import GoogleMap
from ...simulator import Simulator
from ...yandex_map import YandexMap
LAT_MIN, LAT_MAX = 44.960236, 54.967830
LON_MIN, LON_MAX = 53.084167, 58.677977
def create_new_asset(yandex_map, google_map):
folder = Path('dataset_ya_go_maps')
id = 0
print(id)
while (folder / f"{id:0{4}}_google.png").exists():
id += 1
google_file = folder / f"{id:0{4}}_google.png"
yandex_file = folder / f"{id:0{4}}_yandex.png"
lat = np.random.rand() * (LAT_MAX - LAT_MIN) + LAT_MIN
lon = np.random.rand() * (LON_MAX - LON_MIN) + LON_MIN
yandex_map.open(lat, lon, 18)
google_map.open(lat, lon, 18)
simulator = Simulator()
simulator._apply_perspective_transform(yandex_map.make_screenshot()).save(yandex_file)
simulator._apply_perspective_transform(google_map.make_screenshot()).save(google_file)
def main():
folder = Path('dataset_ya_go_maps')
if not folder.exists():
folder.mkdir()
yandex_map = YandexMap(initial_zoom=15)
google_map = GoogleMap(initial_zoom=15)
for i in range(4):
create_new_asset(yandex_map, google_map)
if __name__ == '__main__':
main()

330
examples/page.html Normal file
View File

@@ -0,0 +1,330 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Симулятор проекции дрона</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background: #f5f5f5;
}
.container {
display: flex;
gap: 30px;
align-items: flex-start;
}
.canvas-wrapper {
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
canvas {
border: 2px solid #333;
display: block;
}
.controls {
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
min-width: 300px;
}
.control-group {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 5px;
font-weight: bold;
color: #333;
}
input[type="range"] {
width: 100%;
margin-bottom: 5px;
}
.value-display {
text-align: right;
color: #666;
font-size: 14px;
}
h1 {
text-align: center;
color: #333;
}
.info {
background: #e3f2fd;
padding: 10px;
border-radius: 4px;
margin-top: 20px;
font-size: 14px;
}
</style>
</head>
<body>
<h1>Симулятор проекции камеры дрона</h1>
<div class="container">
<div class="canvas-wrapper">
<canvas id="canvas" width="720" height="720"></canvas>
</div>
<div class="controls">
<div class="control-group">
<label for="yaw">Поворот (Yaw):</label>
<input type="range" id="yaw" min="-180" max="180" value="0" step="1">
<div class="value-display"><span id="yaw-value">0</span>°</div>
</div>
<div class="control-group">
<label for="pitch">Тангаж (Pitch):</label>
<input type="range" id="pitch" min="-90" max="90" value="0" step="0.1">
<div class="value-display"><span id="pitch-value">0</span>°</div>
</div>
<div class="control-group">
<label for="roll">Крен (Roll):</label>
<input type="range" id="roll" min="-90" max="90" value="0" step="0.1">
<div class="value-display"><span id="roll-value">0</span>°</div>
</div>
<div class="control-group">
<label for="height">Высота (масштаб):</label>
<input type="range" id="height" min="0.5" max="1.5" value="1" step="0.01">
<div class="value-display"><span id="height-value">1.00</span></div>
</div>
<div class="info">
<strong>Координаты углов:</strong>
<div id="coordinates" style="margin-top: 10px; font-family: monospace; font-size: 12px;"></div>
</div>
</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjs/14.8.1/math.js"></script>
<script>
const canvas = document.getElementById('canvas');
const ctx = canvas.getContext('2d');
// Элементы управления
const yawSlider = document.getElementById('yaw');
const pitchSlider = document.getElementById('pitch');
const rollSlider = document.getElementById('roll');
const heightSlider = document.getElementById('height');
const yawValue = document.getElementById('yaw-value');
const pitchValue = document.getElementById('pitch-value');
const rollValue = document.getElementById('roll-value');
const heightValue = document.getElementById('height-value');
const coordinatesDiv = document.getElementById('coordinates');
// Исходные углы изображения 1000x1000
const sourceCorners = [
[-500, -500, 1], // Верхний левый
[500, -500, 1], // Верхний правый
[500, 500, 1], // Нижний правый
[-500, 500, 1] // Нижний левый
];
// Функция умножения матриц
function multiplyMatrixVector(matrix, vector) {
return [
matrix[0][0] * vector[0] + matrix[0][1] * vector[1] + matrix[0][2] * vector[2],
matrix[1][0] * vector[0] + matrix[1][1] * vector[1] + matrix[1][2] * vector[2],
matrix[2][0] * vector[0] + matrix[2][1] * vector[1] + matrix[2][2] * vector[2]
];
}
// Создание матрицы вращения вокруг X (крен)
function rotationX(angle) {
const rad = angle * Math.PI / 180;
const c = Math.cos(rad);
const s = Math.sin(rad);
return [
[1, 0, 0],
[0, c, -s],
[0, s, c]
];
}
// Создание матрицы вращения вокруг Y (тангаж)
function rotationY(angle) {
const rad = angle * Math.PI / 180;
const c = Math.cos(rad);
const s = Math.sin(rad);
return [
[c, 0, s],
[0, 1, 0],
[-s, 0, c]
];
}
// Создание матрицы вращения вокруг Z (поворот)
function rotationZ(angle) {
const rad = angle * Math.PI / 180;
const c = Math.cos(rad);
const s = Math.sin(rad);
return [
[c, -s, 0],
[s, c, 0],
[0, 0, 1]
];
}
// Умножение двух матриц 3x3
function multiplyMatrices(a, b) {
const result = [[0,0,0], [0,0,0], [0,0,0]];
for (let i = 0; i < 3; i++) {
for (let j = 0; j < 3; j++) {
for (let k = 0; k < 3; k++) {
result[i][j] += a[i][k] * b[k][j];
}
}
}
return result;
}
function inv(a) {
return math.inv(math.matrix(a))._data;
}
// Проекция 3D точки на 2D плоскость
function project3Dto2D(point, height) {
const focalLength = 360;
const distance = 1000 * height;
const z = point[2] + distance;
// if (z <= 0) z = 0.01; // Избегаем деления на ноль
const x = 360 + (focalLength * point[0]) / z;
const y = 360 + (focalLength * point[1]) / z;
return [x, y];
}
// Главная функция отрисовки
function render() {
const yaw = parseFloat(yawSlider.value);
const pitch = parseFloat(pitchSlider.value);
const roll = parseFloat(rollSlider.value);
const height = parseFloat(heightSlider.value);
// Обновление отображаемых значений
yawValue.textContent = yaw;
pitchValue.textContent = pitch.toFixed(1);
rollValue.textContent = roll.toFixed(1);
heightValue.textContent = height.toFixed(2);
// Создание комбинированной матрицы вращения
const Rz = rotationZ(yaw);
const Ry = rotationY(pitch);
const Rx = rotationX(roll);
// R = Rz * Ry * Rx
const R_temp = multiplyMatrices(Rz, Ry);
const _R = multiplyMatrices(R_temp, Rx);
// _R[0][2] = _R[1][2] = 0;
// _R[2][2] = 1;
const R = _R;
console.log(_R, R)
const K = [
[375, 0, 375],
[0, 375, 375],
[0, 0, 1]
];
const T = [
[0.5, 0, 375],
[0, 0.5, 375],
[0, 0, 1],
]
const H = multiplyMatrices(K, multiplyMatrices(R, inv(K)));
const Z = multiplyMatrices(H, T)
// Трансформация углов
const transformedCorners = sourceCorners.map(corner => {
const rotated = multiplyMatrixVector(Z, corner);
const scaled = [
rotated[0],
rotated[1],
rotated[2]
];
console.log(K, R, H, rotated);
return [rotated[0] / rotated[2], rotated[1] / rotated[2]];
// return project3Dto2D(scaled, height);
});
// Очистка canvas
ctx.clearRect(0, 0, canvas.width, canvas.height);
// Рисование сетки для справки
ctx.strokeStyle = '#e0e0e0';
ctx.lineWidth = 1;
for (let i = 0; i <= 720; i += 90) {
ctx.beginPath();
ctx.moveTo(i, 0);
ctx.lineTo(i, 720);
ctx.stroke();
ctx.beginPath();
ctx.moveTo(0, i);
ctx.lineTo(720, i);
ctx.stroke();
}
// Рисование центральной точки
ctx.fillStyle = '#ff0000';
ctx.beginPath();
ctx.arc(360, 360, 5, 0, Math.PI * 2);
ctx.fill();
// Рисование четырёхугольника
ctx.strokeStyle = '#0066cc';
ctx.fillStyle = 'rgba(0, 102, 204, 0.2)';
ctx.lineWidth = 3;
ctx.beginPath();
ctx.moveTo(transformedCorners[0][0], transformedCorners[0][1]);
for (let i = 1; i < transformedCorners.length; i++) {
ctx.lineTo(transformedCorners[i][0], transformedCorners[i][1]);
}
ctx.closePath();
ctx.fill();
ctx.stroke();
// Рисование углов
ctx.fillStyle = '#ff6600';
transformedCorners.forEach((corner, i) => {
ctx.beginPath();
ctx.arc(corner[0], corner[1], 6, 0, Math.PI * 2);
ctx.fill();
// Подписи углов
ctx.fillStyle = '#333';
ctx.font = '12px Arial';
ctx.fillText(`${i + 1}`, corner[0] + 10, corner[1] - 10);
ctx.fillStyle = '#ff6600';
});
// Вывод координат
let coordText = '';
transformedCorners.forEach((corner, i) => {
coordText += `Угол ${i + 1}: (${corner[0].toFixed(1)}, ${corner[1].toFixed(1)})<br>`;
});
coordinatesDiv.innerHTML = coordText;
}
// Обработчики событий
yawSlider.addEventListener('input', render);
pitchSlider.addEventListener('input', render);
rollSlider.addEventListener('input', render);
heightSlider.addEventListener('input', render);
// Начальная отрисовка
render();
</script>
</body>
</html>

249
examples/page2.html Normal file
View File

@@ -0,0 +1,249 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Интерактивная система координат с трансформацией</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 50px auto;
padding: 20px;
}
canvas {
border: 1px solid #ccc;
display: block;
margin: 20px auto;
background: #f9f9f9;
}
.controls {
display: grid;
grid-template-columns: 150px 1fr 80px;
gap: 15px;
margin-bottom: 20px;
align-items: center;
}
input[type="range"] {
width: 100%;
}
label {
font-weight: bold;
}
.value {
text-align: center;
font-family: monospace;
}
</style>
</head>
<body>
<h1>Система координат с матрицей трансформации 3×3</h1>
<div class="controls">
<label for="scale">Масштаб:</label>
<input type="range" id="scale" min="0.5" max="3" step="0.1" value="1">
<span class="value" id="scaleValue">1.0</span>
<label for="projX">Проекция X (px):</label>
<input type="range" id="projX" min="-1.0" max="1.0" step="0.01" value="0">
<span class="value" id="projXValue">0.00</span>
<label for="projY">Проекция Y (py):</label>
<input type="range" id="projY" min="-1.0" max="1.0" step="0.01" value="0">
<span class="value" id="projYValue">0.00</span>
<label for="rotation">Поворот:</label>
<input type="range" id="rotation" min="0" max="360" step="1" value="0">
<span class="value" id="rotationValue"></span>
</div>
<canvas id="canvas" width="600" height="600"></canvas>
<script>
const canvas = document.getElementById('canvas');
const ctx = canvas.getContext('2d');
// Исходные точки квадрата
const originalPoints = [
[-50, -50, 1],
[-25, -50, 1],
[0, -50, 1],
[25, -50, 1],
[50, -50, 1],
[50, 50, 1],
[-50, 50, 1]
];
// Элементы управления
const scaleInput = document.getElementById('scale');
const projXInput = document.getElementById('projX');
const projYInput = document.getElementById('projY');
const rotationInput = document.getElementById('rotation');
// Отображение значений
const scaleValue = document.getElementById('scaleValue');
const projXValue = document.getElementById('projXValue');
const projYValue = document.getElementById('projYValue');
const rotationValue = document.getElementById('rotationValue');
// Умножение матриц 3x3
function multiplyMatrices(a, b) {
const result = [
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]
];
for (let i = 0; i < 3; i++) {
for (let j = 0; j < 3; j++) {
for (let k = 0; k < 3; k++) {
result[i][j] += a[i][k] * b[k][j];
}
}
}
return result;
}
// Применение матрицы к точке
function applyMatrix(matrix, point) {
const x = matrix[0][0] * point[0] + matrix[0][1] * point[1] + matrix[0][2] * point[2];
const y = matrix[1][0] * point[0] + matrix[1][1] * point[1] + matrix[1][2] * point[2];
const w = matrix[2][0] * point[0] + matrix[2][1] * point[1] + matrix[2][2] * point[2];
return [x / w, y / w, 1];
}
// Создание матрицы масштабирования
function getScaleMatrix(s) {
return [
[s, 0, 0],
[0, s, 0],
[0, 0, 1]
];
}
// Создание матрицы поворота
function getRotationMatrix(angle) {
const rad = angle * Math.PI / 180;
const cos = Math.cos(rad);
const sin = Math.sin(rad);
return [
[cos, -sin, 0],
[sin, cos, 0],
[0, 0, 1]
];
}
// Создание матрицы проекции
function getProjectionMatrix(px, py) {
return [
[1, 0, 0],
[0, 1, 0],
[px, py, 1]
];
}
// Рисование сетки координат
function drawGrid() {
ctx.strokeStyle = '#ddd';
ctx.lineWidth = 1;
// Вертикальные линии
for (let x = 0; x <= canvas.width; x += 50) {
ctx.beginPath();
ctx.moveTo(x, 0);
ctx.lineTo(x, canvas.height);
ctx.stroke();
}
// Горизонтальные линии
for (let y = 0; y <= canvas.height; y += 50) {
ctx.beginPath();
ctx.moveTo(0, y);
ctx.lineTo(canvas.width, y);
ctx.stroke();
}
// Оси координат
ctx.strokeStyle = '#999';
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(canvas.width / 2, 0);
ctx.lineTo(canvas.width / 2, canvas.height);
ctx.moveTo(0, canvas.height / 2);
ctx.lineTo(canvas.width, canvas.height / 2);
ctx.stroke();
}
// Рисование четырехугольника
function drawQuadrilateral(points) {
const centerX = canvas.width / 2;
const centerY = canvas.height / 2;
// Рисуем четырехугольник
ctx.fillStyle = 'rgba(66, 153, 225, 0.3)';
ctx.strokeStyle = '#2c5aa0';
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(centerX + points[0][0], centerY - points[0][1]);
for (let i = 1; i < points.length; i++) {
ctx.lineTo(centerX + points[i][0], centerY - points[i][1]);
}
ctx.closePath();
ctx.fill();
ctx.stroke();
// Рисуем точки
ctx.fillStyle = '#e53e3e';
points.forEach(point => {
ctx.beginPath();
ctx.arc(centerX + point[0], centerY - point[1], 5, 0, Math.PI * 2);
ctx.fill();
});
}
// Обновление сцены
function update() {
const scale = parseFloat(scaleInput.value);
const projX = parseFloat(projXInput.value) / 50;
const projY = parseFloat(projYInput.value) / 50;
const rotation = parseFloat(rotationInput.value);
// Обновление отображаемых значений
scaleValue.textContent = scale.toFixed(1);
projXValue.textContent = projX.toFixed(2);
projYValue.textContent = projY.toFixed(2);
rotationValue.textContent = rotation + '°';
// Создание матриц трансформации
const scaleMatrix = getScaleMatrix(scale);
const rotationMatrix = getRotationMatrix(rotation);
const projectionMatrix = getProjectionMatrix(projX, projY);
// Комбинированная матрица: проекция * поворот * масштаб
let transformMatrix = multiplyMatrices(scaleMatrix, rotationMatrix);
transformMatrix = multiplyMatrices(transformMatrix, projectionMatrix);
// Применение трансформации к точкам
const transformedPoints = originalPoints.map(point =>
applyMatrix(transformMatrix, point)
);
// Отрисовка
ctx.clearRect(0, 0, canvas.width, canvas.height);
drawGrid();
drawQuadrilateral(transformedPoints);
}
// Обработчики событий
scaleInput.addEventListener('input', update);
projXInput.addEventListener('input', update);
projYInput.addEventListener('input', update);
rotationInput.addEventListener('input', update);
// Начальная отрисовка
update();
</script>
</body>
</html>

167
examples/transform_app.py Normal file
View File

@@ -0,0 +1,167 @@
from pathlib import Path
from kivy.app import App
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.slider import Slider
from kivy.uix.label import Label
from kivy.uix.image import Image as KivyImage
from kivy.graphics.texture import Texture
from kivy.core.window import Window
import numpy as np
from PIL import Image
import cv2
import io
class CameraTransformApp(App):
def build(self):
Window.size = (900, 700)
# Главный контейнер
main_layout = BoxLayout(orientation='vertical', padding=10, spacing=10)
# Контейнер для изображения
self.image_widget = KivyImage(size_hint=(1, 0.7))
main_layout.add_widget(self.image_widget)
# Контейнер для ползунков
controls_layout = BoxLayout(orientation='vertical', size_hint=(1, 0.3), spacing=5)
# Yaw (рыскание) - вращение вокруг вертикальной оси
yaw_layout = BoxLayout(orientation='horizontal', spacing=10, size_hint=(1, None), height=40)
yaw_layout.add_widget(Label(text='Рыскание (Yaw):', size_hint=(0.2, 1)))
self.yaw_slider = Slider(min=-45, max=45, value=0, size_hint=(0.6, 1))
self.yaw_slider.bind(value=self.on_slider_change)
self.yaw_label = Label(text='', size_hint=(0.2, 1))
yaw_layout.add_widget(self.yaw_slider)
yaw_layout.add_widget(self.yaw_label)
controls_layout.add_widget(yaw_layout)
# Pitch (тангаж) - наклон вперед/назад
pitch_layout = BoxLayout(orientation='horizontal', spacing=10, size_hint=(1, None), height=40)
pitch_layout.add_widget(Label(text='Тангаж (Pitch):', size_hint=(0.2, 1)))
self.pitch_slider = Slider(min=-45, max=45, value=0, size_hint=(0.6, 1))
self.pitch_slider.bind(value=self.on_slider_change)
self.pitch_label = Label(text='', size_hint=(0.2, 1))
pitch_layout.add_widget(self.pitch_slider)
pitch_layout.add_widget(self.pitch_label)
controls_layout.add_widget(pitch_layout)
# Roll (крен) - наклон влево/вправо
roll_layout = BoxLayout(orientation='horizontal', spacing=10, size_hint=(1, None), height=40)
roll_layout.add_widget(Label(text='Крен (Roll):', size_hint=(0.2, 1)))
self.roll_slider = Slider(min=-45, max=45, value=0, size_hint=(0.6, 1))
self.roll_slider.bind(value=self.on_slider_change)
self.roll_label = Label(text='', size_hint=(0.2, 1))
roll_layout.add_widget(self.roll_slider)
roll_layout.add_widget(self.roll_label)
controls_layout.add_widget(roll_layout)
main_layout.add_widget(controls_layout)
# Создаем тестовое изображение с сеткой
self.original_image = self.create_test_image()
self.update_image()
return main_layout
def create_test_image(self):
"""Создает тестовое изображение с сеткой и текстом"""
img = cv2.imread(Path('images') / 'photo_610.png')
return img
def get_rotation_matrix(self, yaw, pitch, roll):
"""Вычисляет матрицу поворота 3D"""
# Конвертируем в радианы
yaw = np.radians(yaw)
pitch = np.radians(pitch)
roll = np.radians(roll)
# Матрица поворота вокруг Z (yaw)
Rz = np.array([
[np.cos(yaw), -np.sin(yaw), 0],
[np.sin(yaw), np.cos(yaw), 0],
[0, 0, 1]
])
# Матрица поворота вокруг Y (pitch)
Ry = np.array([
[np.cos(pitch), 0, -np.sin(pitch)],
[0, 1, 0],
[np.sin(pitch), 0, np.cos(pitch)]
])
# Матрица поворота вокруг X (roll)
Rx = np.array([
[1, 0, 0],
[0, np.cos(roll), -np.sin(roll)],
[0, np.sin(roll), np.cos(roll)]
])
# Комбинируем повороты: R = Rz * Ry * Rx
R = Rx @ Ry @ Rz
return R
def apply_perspective_transform(self, img, yaw, pitch, roll):
"""Применяет перспективное преобразование к изображению"""
h, w = img.shape[:2]
# Параметры камеры
focal_length = w
distance = w
# Матрица камеры
camera_matrix = np.array([
[focal_length / 2, 0, w / 2],
[0, focal_length / 2, h / 2],
[0, 0, 1]
])
# Получаем матрицу поворота
R = self.get_rotation_matrix(yaw, pitch, roll)
T = np.array([
[1, 0, 0.2],
[0, 1, 0.1],
[0, 0, 2],
])
# Создаем матрицу трансформации
H = camera_matrix @ R @ T @ np.linalg.inv(camera_matrix)
# Применяем преобразование
try:
result = cv2.warpPerspective(img, H, (w, h),
borderMode=cv2.BORDER_CONSTANT,
borderValue=(50, 50, 50))
return result
except:
return img
def update_image(self):
"""Обновляет изображение с текущими параметрами"""
yaw = self.yaw_slider.value
pitch = self.pitch_slider.value
roll = self.roll_slider.value
# Применяем трансформацию
transformed = self.apply_perspective_transform(
self.original_image, yaw, pitch, roll
)
# Конвертируем в текстуру Kivy
buf = cv2.flip(transformed, 0).tobytes()
texture = Texture.create(size=(transformed.shape[1], transformed.shape[0]),
colorfmt='bgr')
texture.blit_buffer(buf, colorfmt='bgr', bufferfmt='ubyte')
self.image_widget.texture = texture
def on_slider_change(self, instance, value):
"""Обработчик изменения ползунков"""
self.yaw_label.text = f'{int(self.yaw_slider.value)}°'
self.pitch_label.text = f'{int(self.pitch_slider.value)}°'
self.roll_label.text = f'{int(self.roll_slider.value)}°'
self.update_image()
if __name__ == '__main__':
CameraTransformApp().run()

View File

@@ -1,66 +0,0 @@
import math
import numpy as np
class Geolocation:
""" Класс геопозиции """
x: float
y: float
z: float # Масштаб (Высота)
angle: float # Угол направления движения
def __init__(self, x: float = 0, y: float = 0, z: float = 1, angle: float = 0):
self.x = x
self.y = y
self.z = z
self.angle = angle
def __str__(self) -> str:
return (f"Geolocation(x={self.x:.2f}, y={self.y:.2f}, "
f"z={self.z:.2f}, angle={self.angle:.1f}°)")
def __matmul__(self, mat: np.array) -> 'Geolocation':
return self.copy().apply(mat)
def __imatmul__(self, mat: np.array) -> 'Geolocation':
return self.apply(mat)
def apply(self, mat: np.ndarray) -> 'Geolocation':
""" На основе матрицы трансформации вычисляет новую позицию и направление """
tx, ty = -mat[0, 2], -mat[1, 2]
# Вычисляем угол поворота
rotation = -np.arctan2(mat[1, 0], mat[0, 0])
scale = np.hypot(mat[0, 0], mat[0, 1])
# Координаты уже отцентрированы, поэтому используем их напрямую
dx_meters = tx
dy_meters = ty
angle_global = self.angle + rotation
# Применяем поворот к смещению (учитываем текущий угол БПЛА)
cos_angle = math.cos(angle_global)
sin_angle = math.sin(angle_global)
# Поворачиваем смещение в глобальные координаты
# Обратите внимание: dy_meters инвертирован, так как в изображениях Y направлен вниз
dx_global = dx_meters * cos_angle - dy_meters * sin_angle
dy_global = dx_meters * sin_angle + dy_meters * cos_angle
# Обновляем координаты БПЛА
self.z /= scale
self.x = self.x + dx_global * self.z
self.y = self.y + dy_global * self.z
# Нормализуем угол в диапазоне [-π, π]
self.angle = math.atan2(math.sin(angle_global), math.cos(angle_global))
return self
def copy(self) -> 'Geolocation':
return Geolocation(self.x, self.y, self.z, self.angle)
@staticmethod
def transform(g: 'Geolocation', mat: np.ndarray) -> 'Geolocation | None':
return g.copy().apply(mat)

89
google_map.py Normal file
View File

@@ -0,0 +1,89 @@
from io import BytesIO
from PIL import Image
from selenium.webdriver.common.actions.wheel_input import ScrollOrigin
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
import constants
import cv2
import math
import numpy as np
import os
import utility
def generateURL(lat: float, lon: float, zoom: int):
return f"https://www.google.com/maps/@{lon},{lat},{zoom}z"
class GoogleMap:
initial_zoom: int
initial_lat: float
initial_lon: float
pixel_ratio: float
def __init__(self, initial_lat=49.103814, initial_lon=55.794258, initial_zoom=18):
self.initial_lat = initial_lat
self.initial_lon = initial_lon
self.initial_zoom = initial_zoom
self.pixel_ratio = constants.GOOGLE_PIXEL_RATIO[self.initial_zoom]
options = webdriver.ChromeOptions()
# options.add_experimental_option("detach", True)
self.driver = webdriver.Chrome(options)
self.driver.get(generateURL(initial_lat, initial_lon, initial_zoom))
self.driver.maximize_window()
action = ActionChains(self.driver)
sleep(5)
self.driver.execute_script('document.querySelector(\'.yHc72.qk5Wte\').click()')
sleep(5)
def open(self, lat, lon, zoom):
self.initial_lat = lat
self.initial_lon = lon
self.initial_zoom = zoom
self.pixel_ratio = constants.GOOGLE_PIXEL_RATIO[self.initial_zoom]
self.driver.get(generateURL(lat, lon, zoom))
def save_photo(self, filename: str):
im = self.make_screenshot()
im.save(filename)
def destroy(self):
self.driver.close()
def get_size(self) -> tuple[int, int]:
html = self.driver.find_element(By.TAG_NAME, 'html')
return (html.size['width'], html.size['height'])
def scroll(self, x: float, y: float, count: int = 1, inner_zoom: bool = True):
html = self.driver.find_element(By.TAG_NAME, 'html')
x_offset = (x - 0.5) * (html.size['width'] - 72) + 72
y_offset = (y - 0.5) * html.size['height']
action = ActionChains(self.driver)
for i in range(count-1):
action.scroll_from_origin(ScrollOrigin(html, int(x_offset), int(y_offset)), 0, -100 if inner_zoom else 100)
action.perform()
if i != count - 1:
sleep(0.25)
def move(self, dx: float, dy: float):
self.driver.execute_script(utility.google_map_js_move_script(dx, dy))
def make_as_center(self, x: float, y: float):
dx = (x - 0.5) * self.get_size()[0]
dy = (0.5 - y) * self.get_size()[1]
self.move(dx, dy)
sleep(1)
def make_screenshot(self) -> Image.Image:
png = self.driver.get_screenshot_as_png()
im = Image.open(BytesIO(png))
return utility.cv2_to_pil(np.array(im)[:, 72:])
def get_geolocation(self):
current_url = self.driver.current_url
return utility.parse_google_maps_url(current_url)

388
main.py
View File

@@ -1,23 +1,33 @@
from google_map import GoogleMap
from pathlib import Path
from position import Position
from simulator import Simulator
from time import sleep
from trajectory_drawer import TrajectoryDrawer
from utility import cv2_to_pil
from vision_chunk import VisionChunk
from visualization import VisualizationManager
from yandex_map import YandexMap
from vision_chunk import VisionChunk
from utility import cv2_to_pil
import random
import argparse
import autopilot
import constants
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pickle
import random
import utility
import autopilot
def get_map(map_name: str = 'google', lat=49.103814, lon=55.794258, zoom=18):
if map_name == 'google': return GoogleMap(lat, lon, zoom)
if map_name == 'yandex': return YandexMap(lat, lon, zoom)
return None
def make_global_photo(filename):
yandexMap = YandexMap()
yandexMap.save_photo(filename)
yandexMap.destroy()
def make_global_photo(filename, map_name: str = 'google', lat=49.103814, lon=55.794258, zoom=13):
online_map: YandexMap | GoogleMap = get_map(map_name, lat, lon, zoom)
online_map.save_photo(filename)
online_map.destroy()
def get_trajectory_points(bg_img: str) -> list[(float, float)]:
trajectoryDrawer = TrajectoryDrawer(bg_img)
@@ -26,97 +36,132 @@ def get_trajectory_points(bg_img: str) -> list[(float, float)]:
points = list(map(lambda p: [p[0] / trajectoryDrawer.img.shape[1], p[1] / trajectoryDrawer.img.shape[0]], trajectoryDrawer.points))
return points
def main():
# Скриншот местности
# make_global_photo('map.jpg')
def build(name: str, map_name: str, lat: float, lon: float):
# Получаем траекторию от пользователя
# points = get_trajectory_points('map.jpg')
# Создание папки с информацией о маршруте
dir = Path('trajectories')
if not dir.exists(): dir.mkdir()
dir /= name
assert not dir.exists()
dir.mkdir()
dir_chunks = dir / 'chunks'
dir_chunks.mkdir()
# Trajectory #1
# points = [[np.float64(0.5384504359393909), np.float64(0.4084520767967683)], [np.float64(0.4451750568707629), np.float64(0.38213330305374654)], [np.float64(0.49266070439660997), np.float64(0.2789637099811013)], [np.float64(0.36377108968359656), np.float64(0.3263375027185404)], [np.float64(0.3535955937852008), np.float64(0.4337180995900692)]]
make_global_photo('map.jpg', map_name, lat, lon, 15)
points = get_trajectory_points('map.jpg')
online_map: YandexMap | GoogleMap = get_map(map_name, lat, lon, 15)
# Trajectory #2
# points = [[np.float64(0.29197731306713737), np.float64(0.3452870198135161)], [np.float64(0.33494051797147517), np.float64(0.2010601397017569)], [np.float64(0.39768940934491587), np.float64(0.25369768718780034)], [np.float64(0.4027771572941138), np.float64(0.4158213334448144)], [np.float64(0.2914120077394487), np.float64(0.5547844588079692)]]
# Trajectory #3
# points = [[np.float64(0.2755834585641664), np.float64(0.45687862048392835)], [np.float64(0.295934450360958), np.float64(0.5021469113219258)], [np.float64(0.32872215936689997), np.float64(0.4810918923275084)], [np.float64(0.3649017003389739), np.float64(0.5295184360146684)], [np.float64(0.3999506306556705), np.float64(0.49477765467387963)]]
# Trajectory #4
# points = [[np.float64(0.42143223310783934), np.float64(0.6663760594783815)], [np.float64(0.4253893704016599), np.float64(0.5537317078582484)], [np.float64(0.5124463908657128), np.float64(0.5621537154560153)], [np.float64(0.5124463908657128), np.float64(0.6684815613778233)], [np.float64(0.42143223310783934), np.float64(0.6663760594783815)]]
# Trajectory #5
# points = [[np.float64(0.5983728006743884), np.float64(0.7348048712102382)], [np.float64(0.5966768846913225), np.float64(0.5453097002604814)], [np.float64(0.6345523416464622), np.float64(0.7190136069644251)], [np.float64(0.6402053949233488), np.float64(0.5495207040593649)], [np.float64(0.5983728006743884), np.float64(0.7348048712102382)]]
# Trajectory #6
# points = [[np.float64(0.4406526142492536), np.float64(0.28106921188054296)], [np.float64(0.38581799746345413), np.float64(0.2968604761263561)], [np.float64(0.3931669667234066), np.float64(0.353709027411283)], [np.float64(0.4248240650739713), np.float64(0.35265627646156217)], [np.float64(0.40616898926024564), np.float64(0.3179154951207735)]]
# Trajectory #7
# points = [[np.float64(0.5491912371654754), np.float64(0.7505961354560512)], [np.float64(0.5537136797869846), np.float64(0.6863783275230781)], [np.float64(0.5017055896396284), np.float64(0.6653233085286606)], [np.float64(0.5520177638039186), np.float64(0.6042637534448503)], [np.float64(0.5593667330638712), np.float64(0.516885424618018)]]
# Trajectory #8
# points =
# Trajectory #9
# points =
# Trajectory #10
# points =
print(points)
# Для каждой точки сделаем приближенный снимок
yandexMap = YandexMap()
chunks: list[VisionChunk] = []
plt.ion()
for i in range(len(points)):
point = points[i]
yandexMap.scroll(point[0], point[1], 5, True)
sleep(1)
cv2_img = yandexMap.make_screenshot(point[0], point[1], 0.2, 0.2)
img = cv2_to_pil(cv2_img)
chunk = VisionChunk(img)
Path('chunks').mkdir(exist_ok=True)
chunk.save_image(Path('.') / 'chunks' / f'chunk_{i}.png')
plt.subplot(1, len(points), i+1)
plt.imshow(img)
plt.pause(0.25)
yandexMap.scroll(point[0], point[1], 5, False)
plt.tight_layout()
# Выделим на каждой картинке ключевые точки
for i in range(len(points)):
chunk = VisionChunk.load_image(Path('chunks') / f'chunk_{i}.png')
chunks.append(chunk)
kp, des = chunk.compute_keypoints()
plt.subplot(1, len(points), i+1)
plt.imshow(chunk.image)
kp_coords = np.array([j.pt for j in kp])
if len(kp_coords) > 0:
plt.scatter(kp_coords[:, 0], kp_coords[:, 1], c='red', s=20, alpha=0.7, marker='o')
plt.pause(0.2)
plt.ioff()
plt.show(block=True)
# Начнём симуляцию полёта с первой точки
yandexMap.scroll(points[0][0], points[0][1], 5, True)
sleep(0.2)
yandexMap.make_as_center(*points[0])
sleep(1)
vis_manager = VisualizationManager()
width, height = yandexMap.get_size()
# print(width, height)
width, height = online_map.get_size()
points_coords = np.array(list(map(lambda p: [
(p[0] - points[0][0]) * width, (points[0][1] - p[1]) * height
], points)))
points_coords *= 2 ** 4
pilot = autopilot.AutoPilot(points_coords, chunks, vis_manager)
simulator = Simulator(yandexMap)
points_coords *= online_map.pixel_ratio
# Начнём симуляцию полёта с первой точки
online_map.make_as_center(*points[0])
sleep(3)
online_map.scroll(0.5, 0.5, 10, True)
sleep(2)
geo = online_map.get_geolocation()
online_map.open(geo['lat'], geo['lon'], 18)
sleep(5)
points_coords_pixel = points_coords.copy() / online_map.pixel_ratio
pilot = autopilot.AutoPilot(points_coords_pixel, pixel_ratio=online_map.pixel_ratio)
simulator = Simulator(online_map)
pilot.target_idx = 0
pilot.pos = simulator.pos.copy()
command = pilot.make_command()
positions: list[Position] = []
print("points_coords_pixel:", points_coords_pixel)
for i in range(5000):
if command.stop:
break
chunk = simulator.get_chunk()
pilot.pos = simulator.pos.copy()
command = pilot.make_command()
command.velocity /= online_map.pixel_ratio
print("Position:", simulator.pos)
# Save Image
chunk.save_image(dir_chunks / f"chunk_{len(positions)}.png")
positions.append(simulator.pos.copy() * online_map.pixel_ratio)
simulator.handle(command.dangle, command.velocity)
if i == 0 and map_name == 'google':
simulator.pos.x = 0
simulator.pos.y = 0
sleep(1.5)
data = {
'points': points_coords,
'chunk_positions': positions,
'initial_geolocation': geo
}
print(points_coords)
file_positions = dir / 'positions.pkl'
with file_positions.open('wb') as file:
pickle.dump(data, file)
print("WRITE POINTS:", points)
sleep(15)
online_map.destroy()
def run(name: str, map_name: str, ref_min_distance: float):
dir = Path('trajectories')
assert dir.exists()
dir /= name
assert dir.exists(), "Укажите корректное название маршрута"
dir_chunks = dir / 'chunks'
file_positions = dir / 'positions.pkl'
with file_positions.open('rb') as file:
data = pickle.load(file)
initial_geolocation = data['initial_geolocation']
lat = initial_geolocation['lat']
lon = initial_geolocation['lon']
online_map: YandexMap | GoogleMap = get_map(map_name, lat, lon, 18)
sleep(2)
chunks: list[VisionChunk] = []
for i in range(len(data['chunk_positions'])):
pos = data['chunk_positions'][i]
if len(chunks) == 0 or np.hypot(chunks[-1].pos.x - pos.x, chunks[-1].pos.y - pos.y) > ref_min_distance:
chunk = VisionChunk.load_image(dir_chunks / f"chunk_{i}.png")
chunk.pos = data['chunk_positions'][i] / online_map.pixel_ratio
chunks.append(chunk)
r = 0
for i in range(len(data['points']) - 1):
r += np.hypot(
data['points'][i][0] - data['points'][i+1][0],
data['points'][i][1] - data['points'][i+1][1]
)
print("R: ", r)
return
points = data['points'] / online_map.pixel_ratio
print("READ POINTS:", points)
vis_manager = VisualizationManager()
pilot = autopilot.AutoPilot(points, chunks, vis_manager, online_map.pixel_ratio)
simulator = Simulator(online_map)
pilot.target_idx = 0
chunk = simulator.get_chunk()
@@ -125,28 +170,26 @@ def main():
vis_manager.update_display()
vis_manager.pause(1)
vis_manager.set_target_points(points_coords)
vis_manager.set_target_points(data['points'])
proc_time = np.array([])
zoom_next_event = random.randint(5, 10)
errors = []
chunk_errors = []
chunk_improves = []
last_chunk_index = 0
sleep(1)
for i in range(10000000000):
print(f"Image #{i}")
if i == zoom_next_event:
r = random.randint(0, 1)
direction = ['up', 'down'][r]
simulator.change_zoom(direction)
zoom_next_event = i + random.randint(20, 40)
if i > 0:
simulator.set_pitch(np.sin(i / 10) * 5)
simulator.set_roll(np.sin(i / 15) * 5)
simulator.set_zoom(1.0 + np.sin(i / 10) * 0.3)
if command.stop:
break
chunk = simulator.get_chunk()
command = pilot.handle(chunk)
command.velocity /= online_map.pixel_ratio
proc_time = np.append(proc_time, command.proccessing_time)
@@ -157,36 +200,133 @@ def main():
vis_manager.pause(0.2)
vis_manager.set_target_index(pilot.target_idx)
vis_manager.update_drone_trajectory(pilot.geo.x, pilot.geo.y)
vis_manager.update_global_map(simulator.geo.x, simulator.geo.y)
vis_manager.update_error_plot(i, pilot.geo.x, pilot.geo.y, simulator.geo.x, simulator.geo.y)
vis_manager.update_drone_trajectory(pilot.pos.x * online_map.pixel_ratio, pilot.pos.y * online_map.pixel_ratio)
vis_manager.update_global_map(simulator.pos.x * online_map.pixel_ratio, simulator.pos.y * online_map.pixel_ratio)
vis_manager.update_error_plot(i, pilot.pos.x * online_map.pixel_ratio, pilot.pos.y * online_map.pixel_ratio, simulator.pos.x * online_map.pixel_ratio, simulator.pos.y * online_map.pixel_ratio)
errors.append(np.hypot(pilot.geo.x - simulator.geo.x, pilot.geo.y - simulator.geo.y))
if last_chunk_index != pilot.target_idx:
last_chunk_index = pilot.target_idx
chunk_errors.append(errors[-1])
chunk_improves.append(errors[-1] - errors[max(len(errors) - 2, 0)])
if command.stop:
break
simulator.handle(command.dangle, command.velocity)
errors.append(np.hypot((pilot.pos.x - simulator.pos.x) * online_map.pixel_ratio, (pilot.pos.y - simulator.pos.y) * online_map.pixel_ratio))
vis_manager.update_display()
vis_manager.pause(0.2)
last_proc_times = proc_time[-10:]
last_proc_times = proc_time[-30:]
print(F"\nImage #{i}")
print("Average FPS:", 1 / last_proc_times.mean())
print("Pilot coords:", pilot.pos)
print("Simulator coords:", simulator.pos)
sleep(0.5)
simulator.handle(command.dangle, command.velocity)
if i == 0 and map_name == 'google':
simulator.pos.x = 0
simulator.pos.y = 0
print("Errors:", errors)
print("MSE:", (np.array(errors) ** 2).mean())
print("RMSE:", (np.array(errors) ** 2).mean() ** 0.5)
print("Chunk errors:", chunk_errors)
print("Chunk error improves:", chunk_improves)
print("Average FPS:", 1 / proc_time.mean())
vis_manager.show_final()
if __name__ == "__main__":
main()
def parse_args():
"""Парсер аргументов командной строки"""
parser = argparse.ArgumentParser(description='Обработка траекторий')
#TODO
# Добавляем обязательный аргумент --mode
parser.add_argument(
'--mode',
type=str,
required=True,
choices=['standalone', 'build', 'run'],
help='Режим работы: standalone, build или run'
)
# Добавляем опциональный аргумент --name
parser.add_argument(
'--name',
type=str,
default=utility.generate_folder_name(),
help='Название траектории'
)
# Координаты
parser.add_argument(
'--lat',
type=float,
default=49.103814,
help='Широта (по умолчанию 49.103814)'
)
parser.add_argument(
'--lon',
type=float,
default=55.794258,
help='Долгота (по умолчанию 55.794258)'
)
# Источник эталонных изображений (ориентиров)
parser.add_argument(
'--reference',
type=str,
default='google',
choices=['google', 'yandex'],
help='Откуда берутся эталонные изображения (ориентиры): google или yandex (по умолчанию google)'
)
# Место проведения симуляции
parser.add_argument(
'--simulation',
type=str,
default='yandex',
choices=['google', 'yandex'],
help='Где проводится симуляция: google или yandex (по умолчанию yandex)'
)
# Место проведения симуляции
parser.add_argument(
'--ref-min-distance',
type=float,
default=100,
help='Минимальное расстояние между эталонами'
)
# Место проведения симуляции
parser.add_argument(
'--debug-fps',
action='store_true',
help='Включить отладку FPS'
)
# Место проведения симуляции
parser.add_argument(
'--debug-landmark',
action='store_true',
help='Включить отладку эталонов'
)
# Парсим аргументы
args = parser.parse_args()
# Проверяем, что для build и run указан --name
if args.mode in ['build', 'run'] and not args.name:
parser.error(f"--name обязателен для режима {args.mode}")
return args
if __name__ == "__main__":
args = parse_args()
name = args.name
mode = args.mode
sim: str = args.simulation
ref: str = args.reference
lat: float = args.lat
lon: float = args.lon
rmd: float = args.ref_min_distance
constants.DEBUG_FPS = args.debug_fps
constants.DEBUG_LANDMARK = args.debug_landmark
if mode == 'build' or mode == 'standalone':
build(name, ref, lat, lon)
if mode == 'run' or mode == 'standalone':
run(name, sim, rmd)

BIN
map.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.4 MiB

After

Width:  |  Height:  |  Size: 3.3 MiB

0
models/SiaN/.gitignore vendored Normal file
View File

View File

@@ -0,0 +1,295 @@
# Homography Estimation System
This system provides a complete pipeline for estimating homography matrices between Google and Yandex map images using deep learning.
## Overview
Homography estimation is crucial for aligning images from different sources (Google Maps and Yandex Maps in this case). The system includes:
1. **Dataset handling** - Loading and preprocessing image pairs
2. **Data augmentation** - Homography-based augmentation for robust training
3. **CNN model** - Deep learning model for homography estimation
4. **Training pipeline** - Complete training and evaluation workflow
5. **Inference tools** - Tools for using trained models on new data
## Installation
### Prerequisites
- Python 3.8+
- PyTorch 1.9+
- OpenCV
- PIL/Pillow
- NumPy
### Install dependencies
```bash
pip install torch torchvision opencv-python pillow numpy matplotlib tqdm tensorboard
```
## Dataset Structure
The system expects image pairs in the following format:
```
dataset/
├── 0000_google.png
├── 0000_yandex.png
├── 0001_google.png
├── 0001_yandex.png
└── ...
```
Each pair consists of:
- `{idx:04d}_google.png` - Google map image
- `{idx:04d}_yandex.png` - Yandex map image
## Quick Start
### 1. Explore the dataset
```python
from models.homography import HomographyDataset
dataset = HomographyDataset(
root_dir=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
augment=True,
image_size=(256, 256)
)
print(f"Found {len(dataset)} image pairs")
sample = dataset[0]
print(f"Sample homography matrix:\n{sample['homography'].numpy()}")
```
### 2. Train a model
```bash
python models/train_homography.py \
--data_dir "C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images" \
--epochs 50 \
--batch_size 16 \
--lr 1e-3 \
--output_dir "runs/my_experiment"
```
### 3. Perform inference
```bash
python models/infer_homography.py \
--model_path "runs/my_experiment/checkpoint_best.pth" \
--mode single \
--google_path "path/to/google.png" \
--yandex_path "path/to/yandex.png" \
--output_vis "alignment_result.png"
```
## File Structure
```
models/
├── homography.py # Dataset class and data loaders
├── homography_cnn.py # CNN model architecture
├── train_homography.py # Training script
├── infer_homography.py # Inference script
├── example_homography.py # Example usage
├── homography.ipynb # Jupyter notebook (empty)
└── README_homography.md # This file
```
## Model Architecture
The homography estimation model (`HomographyCNN`) consists of:
1. **Dual encoders** - Separate feature extraction for Google and Yandex images
2. **Residual blocks** - For deep feature learning
3. **Fusion layers** - Combine features from both images
4. **Regression head** - Predict 3x3 homography matrix
### Key Features:
- Residual connections for stable training
- Batch normalization options
- Dropout for regularization
- Geometric consistency loss
## Training Configuration
Default training parameters:
- **Optimizer**: Adam with learning rate 1e-3
- **Loss function**: Combined matrix + geometric + regularization loss
- **Batch size**: 32
- **Image size**: 256x256
- **Train/val split**: 80/20
## Inference Modes
The inference script supports three modes:
### 1. Single image pair
```bash
python infer_homography.py --mode single \
--google_path google.png --yandex_path yandex.png
```
### 2. Dataset evaluation
```bash
python infer_homography.py --mode dataset \
--dataset_dir path/to/dataset --num_samples 100
```
### 3. Batch processing
```bash
python infer_homography.py --mode batch \
--input_dir path/to/input --output_dir path/to/output
```
## Evaluation Metrics
The system computes several metrics:
- **Matrix MSE**: Mean squared error of homography matrix elements
- **Corner error**: Average pixel error at image corners
- **Geometric consistency**: Warping error across grid points
## Data Augmentation
The dataset applies homography-based augmentation:
- Random rotation (-30° to 30°)
- Random scaling (0.8x to 1.2x)
- Random translation (-50 to 50 pixels)
- Small perspective distortion
## Integration with Autopilot System
The homography estimation can be integrated into the autopilot system:
```python
from models.infer_homography import HomographyInference
# Initialize inference
inference = HomographyInference(model_path="path/to/model.pth")
# During flight loop
homography = inference.predict(google_img, yandex_img)
# Use homography to update drone position
```
## Troubleshooting
### Common Issues
1. **Out of memory**
- Reduce batch size
- Use smaller image size
- Enable gradient checkpointing
2. **Poor convergence**
- Adjust learning rate
- Increase model capacity
- Add more data augmentation
3. **Inference errors**
- Check image formats (must be RGB)
- Verify model was trained with same image size
- Ensure proper normalization
### Debugging Tips
- Use `example_homography.py` to test components
- Enable TensorBoard for training visualization
- Check homography matrix normalization (last element should be ~1)
## Advanced Usage
### Custom Model Architecture
```python
from models.homography_cnn import create_homography_model
model = create_homography_model(
model_type="cnn",
input_size=(512, 512),
hidden_channels=128,
num_blocks=6,
dropout_rate=0.5
)
```
### Custom Loss Function
```python
from models.homography_cnn import HomographyLoss
loss_fn = HomographyLoss(
matrix_weight=0.7,
geometric_weight=0.3,
reg_weight=0.05,
grid_size=16
)
```
### Transfer Learning
```python
# Load pretrained model
checkpoint = torch.load("pretrained.pth")
model.load_state_dict(checkpoint["model_state_dict"])
# Fine-tune on new data
for param in model.google_encoder.parameters():
param.requires_grad = False # Freeze encoder
```
## Performance Tips
1. **Training speed**
- Use multiple GPU workers for data loading
- Enable mixed precision training
- Use gradient accumulation for larger effective batch sizes
2. **Memory efficiency**
- Use gradient checkpointing
- Implement progressive resizing
- Use memory-efficient optimizers
3. **Inference speed**
- Use TensorRT or ONNX for deployment
- Implement model quantization
- Use batch inference when possible
## Future Improvements
1. **Model enhancements**
- Transformer-based architecture
- Multi-scale feature fusion
- Uncertainty estimation
2. **Training improvements**
- Self-supervised pre-training
- Curriculum learning
- Adversarial training
3. **Deployment features**
- Real-time inference optimization
- Mobile deployment
- Web API
## Citation
If you use this system in your research, please cite:
```bibtex
@software{homography_estimation_2024,
title = {Homography Estimation for Map Alignment},
author = {Autopilot Team},
year = {2024},
url = {https://github.com/your-repo/homography-estimation}
}
```
## License
This project is licensed under the MIT License - see the LICENSE file for details.
## Support
For questions and issues:
1. Check the troubleshooting section
2. Review the example scripts
3. Open an issue on GitHub
4. Contact the development team
---
*Last updated: October 2024*

View File

@@ -0,0 +1,345 @@
"""
Example script demonstrating the complete homography estimation workflow.
This script shows how to:
1. Load the ya_go_maps dataset
2. Create and train a homography estimation model
3. Perform inference on new image pairs
4. Visualize results
"""
import os
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
from models.homography import HomographyDataset, create_data_loaders
from models.homography_cnn import HomographyCNN, HomographyLoss, create_homography_model
from models.infer_homography import HomographyInference
from models.train_homography import HomographyTrainer
def example_dataset_loading():
"""Example 1: Loading and exploring the dataset."""
print("=" * 60)
print("Example 1: Loading and exploring the dataset")
print("=" * 60)
# Path to the dataset
dataset_path = r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images"
# Create dataset
dataset = HomographyDataset(
root_dir=dataset_path,
augment=True,
image_size=(256, 256),
cache_homographies=True,
)
print(f"Dataset size: {len(dataset)} image pairs")
# Get a sample
sample = dataset[0]
print(f"\nSample keys: {list(sample.keys())}")
print(f"Google image shape: {sample['google_img'].shape}")
print(f"Yandex image shape: {sample['yandex_img'].shape}")
print(f"Homography shape: {sample['homography'].shape}")
# Show sample homography matrix
print(f"\nSample homography matrix:")
print(sample["homography"].numpy())
# Get sample without augmentation for visualization
raw_sample = dataset.get_sample_without_augmentation(0)
# Visualize sample
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
axes[0].imshow(raw_sample["google_img"])
axes[0].set_title("Google Map")
axes[0].axis("off")
axes[1].imshow(raw_sample["yandex_img"])
axes[1].set_title("Yandex Map")
axes[1].axis("off")
plt.suptitle("Sample Image Pair (without augmentation)")
plt.tight_layout()
plt.show()
return dataset
def example_model_creation():
"""Example 2: Creating and testing the model."""
print("\n" + "=" * 60)
print("Example 2: Creating and testing the model")
print("=" * 60)
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Create model
model = HomographyCNN(
input_channels=3,
hidden_channels=64,
num_blocks=4,
dropout_rate=0.3,
use_batch_norm=True,
).to(device)
print(
f"Model created with {sum(p.numel() for p in model.parameters()):,} parameters"
)
# Create dummy input
batch_size = 2
height, width = 256, 256
google_img = torch.randn(batch_size, 3, height, width).to(device)
yandex_img = torch.randn(batch_size, 3, height, width).to(device)
# Test forward pass
print("\nTesting forward pass...")
output = model(google_img, yandex_img, return_matrix=True)
print(f"Output shape: {output.shape}") # Should be (2, 3, 3)
print(f"Sample output matrix:")
print(output[0].cpu().detach().numpy())
# Test loss function
print("\nTesting loss function...")
target_homography = torch.eye(3).unsqueeze(0).expand(batch_size, -1, -1).to(device)
loss_fn = HomographyLoss(
matrix_weight=1.0,
geometric_weight=0.5,
reg_weight=0.1,
grid_size=8,
).to(device)
loss = loss_fn(output, target_homography, google_img, yandex_img)
print(f"Loss value: {loss.item():.6f}")
# Test metrics
print("\nTesting metrics...")
metrics = loss_fn.compute_metrics(output, target_homography)
for key, value in metrics.items():
print(f"{key}: {value:.6f}")
return model, loss_fn
def example_data_loaders():
"""Example 3: Creating data loaders for training."""
print("\n" + "=" * 60)
print("Example 3: Creating data loaders for training")
print("=" * 60)
# Path to the dataset
dataset_path = r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images"
# Create data loaders
train_loader, val_loader = create_data_loaders(
root_dir=dataset_path,
batch_size=16,
train_split=0.8,
num_workers=0, # Use 0 for debugging, increase for training
image_size=(256, 256),
augment_train=True,
augment_val=False,
)
print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
# Get a batch from train loader
batch = next(iter(train_loader))
print(f"\nBatch keys: {list(batch.keys())}")
print(f"Batch size: {batch['google_img'].shape[0]}")
print(f"Image shape: {batch['google_img'].shape[1:]}")
return train_loader, val_loader
def example_training_config():
"""Example 4: Setting up training configuration."""
print("\n" + "=" * 60)
print("Example 4: Training configuration")
print("=" * 60)
# Training configuration
config = {
# Model config
"model_type": "cnn",
"hidden_channels": 64,
"num_blocks": 4,
"dropout_rate": 0.3,
"use_batch_norm": True,
"image_size": [256, 256],
# Training config
"epochs": 50,
"batch_size": 16,
"learning_rate": 1e-3,
"weight_decay": 1e-4,
"optimizer": "adam",
"scheduler": "plateau",
"grad_clip": 1.0,
# Loss config
"matrix_weight": 1.0,
"geometric_weight": 0.5,
"reg_weight": 0.1,
"grid_size": 8,
# Data config
"data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
"train_split": 0.8,
"num_workers": 0,
# Output config
"output_dir": "runs/example_training",
"seed": 42,
}
print("Training configuration:")
for key, value in config.items():
print(f" {key}: {value}")
return config
def example_inference():
"""Example 5: Performing inference with a trained model."""
print("\n" + "=" * 60)
print("Example 5: Inference example")
print("=" * 60)
# Note: This example assumes you have a trained model
# For demonstration, we'll show the code structure
print("Inference workflow:")
print("1. Load trained model")
print("2. Preprocess input images")
print("3. Predict homography matrix")
print("4. Visualize alignment")
# Example code structure (commented out since we don't have a trained model yet)
"""
# Create inference object
inference = HomographyInference(
model_path="runs/homography/checkpoint_best.pth",
device="cuda" if torch.cuda.is_available() else "cpu",
)
# Load images
google_img = Image.open("path/to/google.png")
yandex_img = Image.open("path/to/yandex.png")
# Predict homography
homography = inference.predict(google_img, yandex_img)
print(f"Predicted homography matrix:")
print(homography.cpu().numpy())
# Visualize alignment
inference.visualize_alignment(
google_img,
yandex_img,
homography.cpu().numpy(),
save_path="alignment_visualization.png",
show=True,
)
"""
print(
"\nNote: To run actual inference, first train a model using train_homography.py"
)
def example_quick_start():
"""Example 6: Quick start guide."""
print("\n" + "=" * 60)
print("Example 6: Quick Start Guide")
print("=" * 60)
print("\nTo get started with homography estimation:")
print("\n1. First, explore the dataset:")
print(
' python -c "from models.homography import HomographyDataset; '
"dataset = HomographyDataset(r'C:\\Users\\admin\\Projects\\autopilot\\datasets\\ya_go_maps\\images'); "
"print(f'Found {len(dataset)} image pairs')\""
)
print("\n2. Train a model:")
print(" python models/train_homography.py \\")
print(
' --data_dir "C:\\Users\\admin\\Projects\\autopilot\\datasets\\ya_go_maps\\images" \\'
)
print(" --epochs 50 \\")
print(" --batch_size 16 \\")
print(' --output_dir "runs/my_experiment"')
print("\n3. Perform inference on a single image pair:")
print(" python models/infer_homography.py \\")
print(' --model_path "runs/my_experiment/checkpoint_best.pth" \\')
print(" --mode single \\")
print(' --google_path "path/to/google.png" \\')
print(' --yandex_path "path/to/yandex.png" \\')
print(' --output_vis "alignment_result.png"')
print("\n4. Evaluate on the entire dataset:")
print(" python models/infer_homography.py \\")
print(' --model_path "runs/my_experiment/checkpoint_best.pth" \\')
print(" --mode dataset \\")
print(
' --dataset_dir "C:\\Users\\admin\\Projects\\autopilot\\datasets\\ya_go_maps\\images" \\'
)
print(' --save_results "evaluation_results.json"')
def main():
"""Run all examples."""
print("Homography Estimation Workflow Examples")
print("=" * 60)
try:
# Example 1: Dataset loading
dataset = example_dataset_loading()
# Example 2: Model creation
model, loss_fn = example_model_creation()
# Example 3: Data loaders
train_loader, val_loader = example_data_loaders()
# Example 4: Training configuration
config = example_training_config()
# Example 5: Inference
example_inference()
# Example 6: Quick start
example_quick_start()
print("\n" + "=" * 60)
print("All examples completed successfully!")
print("=" * 60)
print("\nNext steps:")
print("1. Run the training script to train a model")
print("2. Use the inference script to test the trained model")
print("3. Integrate the homography estimation into your autopilot system")
except Exception as e:
print(f"\nError running examples: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

1679
models/SiaN/homography.ipynb Normal file

File diff suppressed because it is too large Load Diff

434
models/SiaN/homography.py Normal file
View File

@@ -0,0 +1,434 @@
import os
import random
from typing import Any, Dict, List, Optional, Tuple
import cv2
import numpy as np
import torch
from PIL import Image
from torch.utils.data import DataLoader, Dataset
class HomographyDataset(Dataset):
"""
Dataset for homography estimation between Yandex and Google map image pairs.
This dataset loads pairs of images (Yandex and Google maps) and provides
homography matrices for data augmentation and training.
"""
def __init__(
self,
root_dir: str,
transform=None,
augment: bool = True,
max_samples: Optional[int] = None,
image_size: Tuple[int, int] = (700, 700),
cache_homographies: bool = True,
):
"""
Initialize the HomographyDataset.
Args:
root_dir: Directory containing image pairs (format: {idx:04d}_google.png, {idx:04d}_yandex.png)
transform: Optional torchvision transforms to apply
augment: Whether to apply homography-based data augmentation
max_samples: Maximum number of samples to load (None for all)
image_size: Target size for images (height, width)
cache_homographies: Whether to cache generated homography matrices to disk
"""
self.root_dir = root_dir
self.transform = transform
self.augment = augment
self.image_size = image_size
self.cache_homographies = cache_homographies
# Find all image pairs
self.image_pairs = self._discover_image_pairs()
if max_samples is not None:
self.image_pairs = self.image_pairs[:max_samples]
print(f"Found {len(self.image_pairs)} image pairs in {root_dir}")
# Create directory for cached homographies if needed
if cache_homographies:
self.homography_cache_dir = os.path.join(root_dir, "homography_cache")
os.makedirs(self.homography_cache_dir, exist_ok=True)
def _discover_image_pairs(self) -> List[Dict[str, Any]]:
"""Discover all Google-Yandex image pairs in the dataset directory."""
image_pairs = []
# Get all Google images
google_files = [
f for f in os.listdir(self.root_dir) if f.endswith("_google.png")
]
for google_file in sorted(google_files):
# Extract index from filename
idx_str = google_file.split("_")[0]
try:
idx = int(idx_str)
except ValueError:
continue
# Check if corresponding Yandex image exists
yandex_file = f"{idx:04d}_yandex.png"
yandex_path = os.path.join(self.root_dir, yandex_file)
if os.path.exists(yandex_path):
image_pairs.append(
{
"idx": idx,
"google_path": os.path.join(self.root_dir, google_file),
"yandex_path": yandex_path,
}
)
return image_pairs
def __len__(self) -> int:
"""Return the number of image pairs in the dataset."""
return len(self.image_pairs)
def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
"""
Get a sample from the dataset.
Returns a dictionary with:
- 'google_img': Google map image tensor
- 'yandex_img': Yandex map image tensor
- 'homography': Ground truth homography matrix (3x3)
- 'idx': Sample index
"""
pair_info = self.image_pairs[idx]
# Load images
google_img = Image.open(pair_info["google_path"]).convert("RGB")
yandex_img = Image.open(pair_info["yandex_path"]).convert("RGB")
# Resize images to target size
google_img = google_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
yandex_img = yandex_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
# Get or generate homography matrix
homography_matrix = self._get_homography_matrix(pair_info["idx"])
# Apply data augmentation if enabled
if self.augment:
google_img, yandex_img, homography_matrix = self._apply_augmentation(
google_img, yandex_img, homography_matrix
)
# Convert images to tensors
if self.transform:
google_img = self.transform(google_img)
yandex_img = self.transform(yandex_img)
else:
# Default conversion to tensor
google_img = (
torch.from_numpy(np.array(google_img)).float().permute(2, 0, 1) / 255.0
)
yandex_img = (
torch.from_numpy(np.array(yandex_img)).float().permute(2, 0, 1) / 255.0
)
# Convert homography to tensor
homography_tensor = torch.from_numpy(homography_matrix).float()
return {
"google_img": google_img,
"yandex_img": yandex_img,
"homography": homography_tensor,
"idx": torch.tensor(pair_info["idx"], dtype=torch.long),
}
def _get_homography_matrix(self, idx: int) -> np.ndarray:
"""
Get homography matrix for a given index.
If cached homography exists, load it. Otherwise generate a new one.
"""
if self.cache_homographies:
cache_path = os.path.join(
self.homography_cache_dir, f"{idx:04d}_homography.npy"
)
if os.path.exists(cache_path):
return np.load(cache_path)
# Generate new homography matrix
homography_matrix = self.generate_random_homography()
# Cache if enabled
if self.cache_homographies:
np.save(cache_path, homography_matrix)
return homography_matrix
def generate_random_homography(self) -> np.ndarray:
"""
Generate a random homography matrix for data augmentation.
Returns:
np.ndarray: 3x3 homography matrix.
"""
# Generate random affine transformation parameters
angle = np.random.uniform(-30, 30) # rotation in degrees
scale = np.random.uniform(0.8, 1.2) # scaling factor
tx = np.random.uniform(-50, 50) # translation in x
ty = np.random.uniform(-50, 50) # translation in y
# Convert angle to radians
theta = np.radians(angle)
# Create affine transformation matrix
affine_matrix = np.array(
[
[scale * np.cos(theta), -scale * np.sin(theta), tx],
[scale * np.sin(theta), scale * np.cos(theta), ty],
[0, 0, 1],
]
)
# Add small perspective distortion
perspective = np.random.uniform(-0.001, 0.001, (2, 3))
perspective = np.vstack([perspective, [0, 0, 0]])
homography_matrix = affine_matrix + perspective
return homography_matrix
def _apply_augmentation(
self,
google_img: Image.Image,
yandex_img: Image.Image,
base_homography: np.ndarray,
) -> Tuple[Image.Image, Image.Image, np.ndarray]:
"""
Apply homography-based data augmentation to image pair.
Args:
google_img: Google map image
yandex_img: Yandex map image
base_homography: Base homography matrix
Returns:
Tuple of (augmented_google_img, augmented_yandex_img, augmented_homography)
"""
# Generate augmentation homography
aug_homography = self.generate_random_homography()
# Combine with base homography
combined_homography = aug_homography @ base_homography
# Apply augmentation to both images
google_aug = self._apply_homography_to_image(google_img, aug_homography)
yandex_aug = self._apply_homography_to_image(yandex_img, aug_homography)
return google_aug, yandex_aug, combined_homography
def _apply_homography_to_image(
self, img: Image.Image, homography: np.ndarray
) -> Image.Image:
"""
Apply homography transformation to a single image.
Args:
img: PIL Image to transform
homography: 3x3 homography matrix
Returns:
Transformed PIL Image
"""
# Convert to numpy array
img_np = np.array(img)
# Get image dimensions
h, w = img_np.shape[:2]
# Apply homography transformation
transformed = cv2.warpPerspective(
img_np,
homography,
(w, h),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REFLECT,
)
# Convert back to PIL Image
return Image.fromarray(transformed)
def get_sample_without_augmentation(self, idx: int) -> Dict[str, Any]:
"""
Get a sample without data augmentation.
Useful for visualization and evaluation.
"""
pair_info = self.image_pairs[idx]
# Load images
google_img = Image.open(pair_info["google_path"]).convert("RGB")
yandex_img = Image.open(pair_info["yandex_path"]).convert("RGB")
# Resize
google_img = google_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
yandex_img = yandex_img.resize(
(self.image_size[1], self.image_size[0]), Image.BILINEAR
)
# Get homography matrix
homography_matrix = self._get_homography_matrix(pair_info["idx"])
return {
"google_img": google_img,
"yandex_img": yandex_img,
"homography": homography_matrix,
"idx": pair_info["idx"],
"google_path": pair_info["google_path"],
"yandex_path": pair_info["yandex_path"],
}
def create_data_loaders(
root_dir: str,
batch_size: int = 32,
train_split: float = 0.8,
num_workers: int = 4,
image_size: Tuple[int, int] = (256, 256),
augment_train: bool = True,
augment_val: bool = False,
) -> Tuple[DataLoader, DataLoader]:
"""
Create train and validation data loaders for homography estimation.
Args:
root_dir: Directory containing image pairs
batch_size: Batch size for data loaders
train_split: Fraction of data to use for training
num_workers: Number of worker processes for data loading
image_size: Target image size (height, width)
augment_train: Whether to augment training data
augment_val: Whether to augment validation data
Returns:
Tuple of (train_loader, val_loader)
"""
from torchvision import transforms
# Define transforms
transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
# Create full dataset
full_dataset = HomographyDataset(
root_dir=root_dir,
transform=transform,
augment=False, # We'll handle augmentation separately
image_size=image_size,
cache_homographies=True,
)
# Split dataset
dataset_size = len(full_dataset)
train_size = int(train_split * dataset_size)
val_size = dataset_size - train_size
# Create indices for splitting
indices = list(range(dataset_size))
random.shuffle(indices)
train_indices = indices[:train_size]
val_indices = indices[train_size:]
# Create subset samplers
from torch.utils.data import Subset
train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)
# Apply augmentation by overriding __getitem__ for train dataset
if augment_train:
class AugmentedSubset(Subset):
def __getitem__(self, idx):
sample = self.dataset[self.indices[idx]]
# Apply augmentation
google_img = sample["google_img"]
yandex_img = sample["yandex_img"]
homography = sample["homography"]
# Generate augmentation homography
aug_homography = torch.from_numpy(
full_dataset.generate_random_homography()
).float()
# Combine homographies
combined_homography = aug_homography @ homography
# Apply augmentation (simplified - in practice would warp images)
# For now, we just return the combined homography
return {
"google_img": google_img,
"yandex_img": yandex_img,
"homography": combined_homography,
"idx": sample["idx"],
}
train_dataset = AugmentedSubset(full_dataset, train_indices)
# Create data loaders
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
pin_memory=True,
)
val_loader = DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=True,
)
return train_loader, val_loader
if __name__ == "__main__":
# Example usage
dataset = HomographyDataset(
root_dir=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
augment=True,
image_size=(256, 256),
)
print(f"Dataset size: {len(dataset)}")
# Get a sample
sample = dataset[0]
print(f"Sample keys: {list(sample.keys())}")
print(f"Google image shape: {sample['google_img'].shape}")
print(f"Yandex image shape: {sample['yandex_img'].shape}")
print(f"Homography shape: {sample['homography'].shape}")
# Create data loaders
train_loader, val_loader = create_data_loaders(
root_dir=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
batch_size=16,
train_split=0.8,
)
print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

View File

@@ -0,0 +1,551 @@
from typing import Optional, Tuple
import torch
import torch.nn as nn
import torch.nn.functional as F
class HomographyCNN(nn.Module):
"""
CNN model for homography estimation between two images.
This model takes two images (Google and Yandex maps) as input and
outputs a 3x3 homography matrix that transforms one image to align with the other.
"""
def __init__(
self,
input_channels: int = 3,
hidden_channels: int = 64,
num_blocks: int = 4,
dropout_rate: float = 0.3,
use_batch_norm: bool = True,
output_size: int = 9, # Flattened 3x3 homography matrix
):
"""
Initialize the HomographyCNN model.
Args:
input_channels: Number of input channels per image (3 for RGB)
hidden_channels: Base number of channels in the network
num_blocks: Number of convolutional blocks
dropout_rate: Dropout rate for regularization
use_batch_norm: Whether to use batch normalization
output_size: Size of output vector (9 for flattened 3x3 matrix)
"""
super().__init__()
self.input_channels = input_channels
self.hidden_channels = hidden_channels
self.num_blocks = num_blocks
self.dropout_rate = dropout_rate
self.use_batch_norm = use_batch_norm
# Feature extraction for each image separately
self.google_encoder = self._build_encoder()
self.yandex_encoder = self._build_encoder()
# Fusion layers to combine features from both images
self.fusion_layers = self._build_fusion_layers()
# Regression head for homography estimation
self.regression_head = self._build_regression_head(output_size)
# Initialize weights
self._initialize_weights()
def _build_encoder(self) -> nn.Module:
"""Build the encoder network for a single image."""
layers = []
in_channels = self.input_channels
out_channels = self.hidden_channels
# First convolutional block
layers.append(
nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=2, padding=3)
)
if self.use_batch_norm:
layers.append(nn.BatchNorm2d(out_channels))
layers.append(nn.ReLU(inplace=True))
layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
# Additional convolutional blocks
for i in range(self.num_blocks):
block_in_channels = out_channels
block_out_channels = out_channels * 2 if i < 2 else out_channels
layers.append(
ResidualBlock(
in_channels=block_in_channels,
out_channels=block_out_channels,
stride=1 if i == 0 else 2,
dropout_rate=self.dropout_rate,
use_batch_norm=self.use_batch_norm,
)
)
if i < 2:
out_channels = block_out_channels
return nn.Sequential(*layers)
def _build_fusion_layers(self) -> nn.Module:
"""Build layers to fuse features from both images."""
# After encoding, each image has hidden_channels * 4 features
fused_channels = (
self.hidden_channels * 8
) # Concatenated features from both images
layers = [
# Reduce dimensionality
nn.Conv2d(
fused_channels, self.hidden_channels * 4, kernel_size=3, padding=1
),
nn.BatchNorm2d(self.hidden_channels * 4)
if self.use_batch_norm
else nn.Identity(),
nn.ReLU(inplace=True),
nn.Dropout2d(self.dropout_rate),
# Further processing
nn.Conv2d(
self.hidden_channels * 4,
self.hidden_channels * 2,
kernel_size=3,
padding=1,
),
nn.BatchNorm2d(self.hidden_channels * 2)
if self.use_batch_norm
else nn.Identity(),
nn.ReLU(inplace=True),
nn.Dropout2d(self.dropout_rate),
# Global pooling
nn.AdaptiveAvgPool2d((1, 1)),
]
return nn.Sequential(*layers)
def _build_regression_head(self, output_size: int) -> nn.Module:
"""Build the regression head for homography estimation."""
# Input size after fusion and global pooling
input_features = self.hidden_channels * 2
layers = [
nn.Flatten(),
nn.Linear(input_features, 512),
nn.BatchNorm1d(512) if self.use_batch_norm else nn.Identity(),
nn.ReLU(inplace=True),
nn.Dropout(self.dropout_rate),
nn.Linear(512, 256),
nn.BatchNorm1d(256) if self.use_batch_norm else nn.Identity(),
nn.ReLU(inplace=True),
nn.Dropout(self.dropout_rate),
nn.Linear(256, 128),
nn.BatchNorm1d(128) if self.use_batch_norm else nn.Identity(),
nn.ReLU(inplace=True),
nn.Dropout(self.dropout_rate),
nn.Linear(128, output_size),
]
return nn.Sequential(*layers)
def _initialize_weights(self):
"""Initialize model weights."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(
self,
google_img: torch.Tensor,
yandex_img: torch.Tensor,
return_matrix: bool = True,
) -> torch.Tensor:
"""
Forward pass of the model.
Args:
google_img: Google map image tensor of shape (B, C, H, W)
yandex_img: Yandex map image tensor of shape (B, C, H, W)
return_matrix: If True, return 3x3 matrix; if False, return flattened vector
Returns:
Homography matrix tensor of shape (B, 3, 3) or flattened vector of shape (B, 9)
"""
# Extract features from both images
google_features = self.google_encoder(google_img)
yandex_features = self.yandex_encoder(yandex_img)
# Concatenate features along channel dimension
combined_features = torch.cat([google_features, yandex_features], dim=1)
# Fuse features
fused_features = self.fusion_layers(combined_features)
# Regression to get homography parameters
homography_flat = self.regression_head(fused_features)
if return_matrix:
# Reshape to 3x3 matrix
batch_size = homography_flat.shape[0]
homography_matrix = homography_flat.view(batch_size, 3, 3)
# Ensure the last element is 1 (homogeneous coordinate normalization)
# Add small epsilon to prevent division by zero
epsilon = 1e-8
homography_matrix = homography_matrix / (
homography_matrix[:, 2, 2].view(-1, 1, 1) + epsilon
)
return homography_matrix
else:
return homography_flat
def predict_homography(
self,
google_img: torch.Tensor,
yandex_img: torch.Tensor,
normalize: bool = True,
) -> torch.Tensor:
"""
Predict homography matrix with optional normalization.
Args:
google_img: Google map image tensor
yandex_img: Yandex map image tensor
normalize: Whether to normalize the homography matrix
Returns:
Predicted homography matrix
"""
self.eval()
with torch.no_grad():
homography = self.forward(google_img, yandex_img, return_matrix=True)
if normalize:
# Normalize so that last element is 1
homography = homography / homography[:, 2, 2].view(-1, 1, 1)
return homography
class ResidualBlock(nn.Module):
"""Residual block with optional downsampling."""
def __init__(
self,
in_channels: int,
out_channels: int,
stride: int = 1,
dropout_rate: float = 0.3,
use_batch_norm: bool = True,
):
super().__init__()
self.conv1 = nn.Conv2d(
in_channels,
out_channels,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
)
self.bn1 = nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity()
self.relu1 = nn.ReLU(inplace=True)
self.dropout1 = (
nn.Dropout2d(dropout_rate) if dropout_rate > 0 else nn.Identity()
)
self.conv2 = nn.Conv2d(
out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False
)
self.bn2 = nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity()
self.relu2 = nn.ReLU(inplace=True)
self.dropout2 = (
nn.Dropout2d(dropout_rate) if dropout_rate > 0 else nn.Identity()
)
# Shortcut connection
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(
in_channels, out_channels, kernel_size=1, stride=stride, bias=False
),
nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
identity = self.shortcut(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.dropout1(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu2(out)
out = self.dropout2(out)
return out
class HomographyLoss(nn.Module):
"""
Custom loss function for homography estimation.
Combines multiple loss terms:
1. Matrix element-wise L2 loss
2. Geometric consistency loss (warping error)
3. Determinant regularization (to prevent degenerate matrices)
"""
def __init__(
self,
matrix_weight: float = 1.0,
geometric_weight: float = 0.5,
reg_weight: float = 0.1,
grid_size: int = 8,
):
super().__init__()
self.matrix_weight = matrix_weight
self.geometric_weight = geometric_weight
self.reg_weight = reg_weight
self.grid_size = grid_size
# Create grid of points for geometric loss
self.register_buffer(
"grid_points",
self._create_grid_points(grid_size),
persistent=False,
)
def _create_grid_points(self, grid_size: int) -> torch.Tensor:
"""Create a grid of points for geometric consistency loss."""
x = torch.linspace(-1, 1, grid_size)
y = torch.linspace(-1, 1, grid_size)
grid_y, grid_x = torch.meshgrid(y, x, indexing="ij")
grid_points = torch.stack([grid_x.flatten(), grid_y.flatten()], dim=1)
# Add homogeneous coordinate
ones = torch.ones(grid_points.shape[0], 1)
grid_points = torch.cat([grid_points, ones], dim=1)
return grid_points.T # Shape: (3, grid_size*grid_size)
def forward(
self,
pred_homography: torch.Tensor,
target_homography: torch.Tensor,
google_img: Optional[torch.Tensor] = None,
yandex_img: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""
Compute homography loss.
Args:
pred_homography: Predicted homography matrices (B, 3, 3)
target_homography: Target homography matrices (B, 3, 3)
google_img: Google images (optional, for geometric loss)
yandex_img: Yandex images (optional, for geometric loss)
Returns:
Combined loss value
"""
batch_size = pred_homography.shape[0]
# 1. Matrix element-wise L2 loss
matrix_loss = F.mse_loss(pred_homography, target_homography)
# 2. Geometric consistency loss (if images provided)
geometric_loss = torch.tensor(0.0, device=pred_homography.device)
if google_img is not None and yandex_img is not None:
# Warp grid points with predicted homography
grid_points = self.grid_points.unsqueeze(0).expand(batch_size, -1, -1)
warped_points = torch.bmm(pred_homography, grid_points)
# Normalize homogeneous coordinates
warped_points = warped_points / (warped_points[:, 2:3, :] + 1e-8)
# Warp with target homography for comparison
target_warped_points = torch.bmm(target_homography, grid_points)
target_warped_points = target_warped_points / (
target_warped_points[:, 2:3, :] + 1e-8
)
# Compute point-wise distance
geometric_loss = F.mse_loss(
warped_points[:, :2, :], target_warped_points[:, :2, :]
)
# 3. Regularization loss (prevent degenerate matrices)
# Encourage determinant to be close to 1
pred_det = torch.det(pred_homography)
reg_loss = F.mse_loss(pred_det, torch.ones_like(pred_det))
# Combine losses
total_loss = (
self.matrix_weight * matrix_loss
+ self.geometric_weight * geometric_loss
+ self.reg_weight * reg_loss
)
return total_loss
def compute_metrics(
self,
pred_homography: torch.Tensor,
target_homography: torch.Tensor,
) -> dict:
"""
Compute evaluation metrics for homography estimation.
Args:
pred_homography: Predicted homography matrices
target_homography: Target homography matrices
Returns:
Dictionary of metrics
"""
with torch.no_grad():
# Normalize matrices
pred_norm = pred_homography / pred_homography[:, 2, 2].view(-1, 1, 1)
target_norm = target_homography / target_homography[:, 2, 2].view(-1, 1, 1)
# Matrix L2 error
matrix_error = F.mse_loss(pred_norm, target_norm, reduction="none").mean(
dim=(1, 2)
)
# Corner error (warp 4 corners of the image)
corners = torch.tensor(
[[-1, -1, 1], [1, -1, 1], [1, 1, 1], [-1, 1, 1]],
dtype=torch.float32,
device=pred_homography.device,
).T # Shape: (3, 4)
corners = corners.unsqueeze(0).expand(pred_homography.shape[0], -1, -1)
pred_corners = torch.bmm(pred_norm, corners)
pred_corners = pred_corners / (pred_corners[:, 2:3, :] + 1e-8)
target_corners = torch.bmm(target_norm, corners)
target_corners = target_corners / (target_corners[:, 2:3, :] + 1e-8)
corner_error = torch.mean(
torch.norm(pred_corners[:, :2, :] - target_corners[:, :2, :], dim=1),
dim=1,
)
# Average corner error in pixels (assuming image coordinates in [-1, 1])
# Convert to pixel error if image size is known
avg_corner_error = corner_error.mean().item()
return {
"matrix_mse": matrix_error.mean().item(),
"corner_error": avg_corner_error,
"corner_error_px": avg_corner_error * 128, # Assuming 256x256 images
}
def create_homography_model(
model_type: str = "cnn",
input_size: Tuple[int, int] = (256, 256),
**kwargs,
) -> nn.Module:
"""
Factory function to create homography estimation model.
Args:
model_type: Type of model to create ('cnn' or 'resnet')
input_size: Input image size (height, width)
**kwargs: Additional arguments passed to model constructor
Returns:
Homography estimation model
"""
if model_type == "cnn":
return HomographyCNN(**kwargs)
else:
raise ValueError(f"Unknown model type: {model_type}")
if __name__ == "__main__":
# Test the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Create model
model = HomographyCNN(
input_channels=3,
hidden_channels=64,
num_blocks=4,
dropout_rate=0.3,
use_batch_norm=True,
).to(device)
print(
f"Model created with {sum(p.numel() for p in model.parameters()):,} parameters"
)
# Create dummy input
batch_size = 4
height, width = 256, 256
google_img = torch.randn(batch_size, 3, height, width).to(device)
yandex_img = torch.randn(batch_size, 3, height, width).to(device)
# Test forward pass
print("\nTesting forward pass...")
output = model(google_img, yandex_img, return_matrix=True)
print(f"Output shape: {output.shape}") # Should be (4, 3, 3)
print(f"Sample output:\n{output[0]}")
# Test prediction
print("\nTesting prediction...")
pred = model.predict_homography(google_img, yandex_img)
print(f"Prediction shape: {pred.shape}")
print(f"Last element (should be ~1): {pred[0, 2, 2]:.6f}")
# Test loss function
print("\nTesting loss function...")
target_homography = torch.eye(3).unsqueeze(0).expand(batch_size, -1, -1).to(device)
loss_fn = HomographyLoss(
matrix_weight=1.0,
geometric_weight=0.5,
reg_weight=0.1,
grid_size=8,
).to(device)
loss = loss_fn(output, target_homography, google_img, yandex_img)
print(f"Loss value: {loss.item():.6f}")
# Test metrics
print("\nTesting metrics...")
metrics = loss_fn.compute_metrics(output, target_homography)
for key, value in metrics.items():
print(f"{key}: {value:.6f}")
# Test model factory
print("\nTesting model factory...")
model2 = create_homography_model(
model_type="cnn",
input_size=(256, 256),
input_channels=3,
hidden_channels=32,
num_blocks=3,
).to(device)
print(
f"Model2 created with {sum(p.numel() for p in model2.parameters()):,} parameters"
)
print("\nAll tests completed successfully!")

View File

@@ -0,0 +1,553 @@
"""
Inference script for homography estimation between Google and Yandex map images.
This script loads a trained homography estimation model and performs inference
on new image pairs or the test dataset.
"""
import argparse
import json
import os
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from homography import HomographyDataset
from homography_cnn import HomographyCNN, create_homography_model
from PIL import Image
from torchvision import transforms
class HomographyInference:
"""Class for performing inference with homography estimation model."""
def __init__(
self,
model_path: str,
config_path: Optional[str] = None,
device: Optional[str] = None,
):
"""
Initialize the inference class.
Args:
model_path: Path to trained model checkpoint
config_path: Path to model configuration file (optional)
device: Device to run inference on ('cuda' or 'cpu')
"""
# Set device
if device is None:
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
self.device = torch.device(device)
print(f"Using device: {self.device}")
# Load configuration
if config_path is None:
# Try to find config in the same directory as model
model_dir = Path(model_path).parent
config_path = model_dir / "config.json"
if os.path.exists(config_path):
with open(config_path, "r") as f:
self.config = json.load(f)
print(f"Loaded configuration from {config_path}")
else:
# Use default configuration
self.config = {
"image_size": [256, 256],
"hidden_channels": 64,
"num_blocks": 4,
"dropout_rate": 0.3,
"use_batch_norm": True,
}
print("Using default configuration")
# Create model
self.model = self._create_model()
self._load_model(model_path)
# Set up transforms
self.transform = self._create_transforms()
# Set model to evaluation mode
self.model.eval()
def _create_model(self) -> HomographyCNN:
"""Create model based on configuration."""
image_size = self.config.get("image_size", [256, 256])
model = create_homography_model(
model_type="cnn",
input_size=tuple(image_size),
input_channels=3,
hidden_channels=self.config.get("hidden_channels", 64),
num_blocks=self.config.get("num_blocks", 4),
dropout_rate=self.config.get("dropout_rate", 0.3),
use_batch_norm=self.config.get("use_batch_norm", True),
)
return model
def _load_model(self, model_path: str):
"""Load model weights from checkpoint."""
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found: {model_path}")
# Load checkpoint
checkpoint = torch.load(model_path, map_location=self.device)
if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
# Trainer checkpoint format
self.model.load_state_dict(checkpoint["model_state_dict"])
else:
# Raw model weights format
self.model.load_state_dict(checkpoint)
self.model = self.model.to(self.device)
print(f"Loaded model from {model_path}")
def _create_transforms(self):
"""Create image transforms for inference."""
return transforms.Compose(
[
transforms.Resize(tuple(self.config.get("image_size", [256, 256]))),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)
def preprocess_images(
self, google_img: Image.Image, yandex_img: Image.Image
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Preprocess images for inference.
Args:
google_img: Google map image (PIL Image)
yandex_img: Yandex map image (PIL Image)
Returns:
Tuple of preprocessed image tensors
"""
# Convert to RGB if needed
if google_img.mode != "RGB":
google_img = google_img.convert("RGB")
if yandex_img.mode != "RGB":
yandex_img = yandex_img.convert("RGB")
# Apply transforms
google_tensor = self.transform(google_img).unsqueeze(0) # Add batch dimension
yandex_tensor = self.transform(yandex_img).unsqueeze(0)
return google_tensor, yandex_tensor
def predict(
self,
google_img: Image.Image,
yandex_img: Image.Image,
return_matrix: bool = True,
normalize: bool = True,
) -> torch.Tensor:
"""
Predict homography matrix for image pair.
Args:
google_img: Google map image (PIL Image)
yandex_img: Yandex map image (PIL Image)
return_matrix: If True, return 3x3 matrix; if False, return flattened vector
normalize: Whether to normalize the homography matrix
Returns:
Predicted homography matrix or vector
"""
# Preprocess images
google_tensor, yandex_tensor = self.preprocess_images(google_img, yandex_img)
# Move to device
google_tensor = google_tensor.to(self.device)
yandex_tensor = yandex_tensor.to(self.device)
# Perform inference
with torch.no_grad():
homography = self.model(
google_tensor, yandex_tensor, return_matrix=return_matrix
)
if return_matrix and normalize:
# Normalize so that last element is 1
homography = homography / homography[:, 2, 2].view(-1, 1, 1)
return homography.squeeze(0) # Remove batch dimension
def predict_from_paths(
self,
google_path: str,
yandex_path: str,
return_matrix: bool = True,
normalize: bool = True,
) -> torch.Tensor:
"""
Predict homography matrix from image file paths.
Args:
google_path: Path to Google map image
yandex_path: Path to Yandex map image
return_matrix: If True, return 3x3 matrix; if False, return flattened vector
normalize: Whether to normalize the homography matrix
Returns:
Predicted homography matrix or vector
"""
# Load images
google_img = Image.open(google_path)
yandex_img = Image.open(yandex_path)
return self.predict(google_img, yandex_img, return_matrix, normalize)
def warp_image(
self,
img: Image.Image,
homography: np.ndarray,
output_size: Optional[Tuple[int, int]] = None,
) -> Image.Image:
"""
Warp image using homography matrix.
Args:
img: Input image (PIL Image)
homography: 3x3 homography matrix (numpy array)
output_size: Output image size (width, height). If None, uses input size.
Returns:
Warped image (PIL Image)
"""
# Convert to numpy array
img_np = np.array(img)
# Get output size
if output_size is None:
output_size = (img_np.shape[1], img_np.shape[0])
# Apply homography transformation
warped_np = cv2.warpPerspective(
img_np,
homography,
output_size,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REFLECT,
)
# Convert back to PIL Image
return Image.fromarray(warped_np)
def visualize_alignment(
self,
google_img: Image.Image,
yandex_img: Image.Image,
homography: np.ndarray,
save_path: Optional[str] = None,
show: bool = True,
):
"""
Visualize alignment between images using homography.
Args:
google_img: Google map image
yandex_img: Yandex map image
homography: Homography matrix
save_path: Path to save visualization (optional)
show: Whether to display the visualization
"""
# Warp yandex image to align with google
yandex_warped = self.warp_image(yandex_img, homography)
# Convert images to numpy arrays for visualization
google_np = np.array(google_img)
yandex_np = np.array(yandex_img)
yandex_warped_np = np.array(yandex_warped)
# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Original images
axes[0, 0].imshow(google_np)
axes[0, 0].set_title("Google Map (Original)")
axes[0, 0].axis("off")
axes[0, 1].imshow(yandex_np)
axes[0, 1].set_title("Yandex Map (Original)")
axes[0, 1].axis("off")
# Warped image
axes[1, 0].imshow(yandex_warped_np)
axes[1, 0].set_title("Yandex Map (Warped)")
axes[1, 0].axis("off")
# Overlay (50% transparency)
overlay = cv2.addWeighted(google_np, 0.5, yandex_warped_np, 0.5, 0)
axes[1, 1].imshow(overlay)
axes[1, 1].set_title("Overlay (Google + Warped Yandex)")
axes[1, 1].axis("off")
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=150, bbox_inches="tight")
print(f"Visualization saved to {save_path}")
if show:
plt.show()
else:
plt.close()
def evaluate_on_dataset(
self,
dataset_dir: str,
num_samples: Optional[int] = None,
save_dir: Optional[str] = None,
) -> Dict[str, float]:
"""
Evaluate model on a dataset.
Args:
dataset_dir: Directory containing image pairs
num_samples: Number of samples to evaluate (None for all)
save_dir: Directory to save visualizations (optional)
Returns:
Dictionary of evaluation metrics
"""
# Create dataset
dataset = HomographyDataset(
root_dir=dataset_dir,
transform=None, # We'll handle transforms manually
augment=False,
image_size=tuple(self.config.get("image_size", [256, 256])),
cache_homographies=False,
)
if num_samples is not None:
indices = list(range(min(num_samples, len(dataset))))
else:
indices = list(range(len(dataset)))
errors = []
corner_errors = []
print(f"Evaluating on {len(indices)} samples...")
for idx in indices:
# Get sample without augmentation
sample = dataset.get_sample_without_augmentation(idx)
google_img = sample["google_img"]
yandex_img = sample["yandex_img"]
target_homography = sample["homography"]
# Predict homography
pred_homography = self.predict(
google_img, yandex_img, return_matrix=True, normalize=True
)
# Convert to numpy
pred_homography_np = pred_homography.cpu().numpy()
target_homography_np = target_homography
# Compute matrix error
matrix_error = np.mean((pred_homography_np - target_homography_np) ** 2)
errors.append(matrix_error)
# Compute corner error
corners = np.array(
[
[-1, -1, 1],
[1, -1, 1],
[1, 1, 1],
[-1, 1, 1],
],
dtype=np.float32,
).T
pred_corners = pred_homography_np @ corners
pred_corners = pred_corners / (pred_corners[2:3, :] + 1e-8)
target_corners = target_homography_np @ corners
target_corners = target_corners / (target_corners[2:3, :] + 1e-8)
corner_error = np.mean(
np.linalg.norm(pred_corners[:2, :] - target_corners[:2, :], axis=0)
)
corner_errors.append(corner_error)
# Save visualization if requested
if save_dir:
os.makedirs(save_dir, exist_ok=True)
vis_path = os.path.join(save_dir, f"sample_{idx:04d}.png")
self.visualize_alignment(
google_img,
yandex_img,
pred_homography_np,
save_path=vis_path,
show=False,
)
# Compute metrics
metrics = {
"mean_matrix_error": float(np.mean(errors)),
"std_matrix_error": float(np.std(errors)),
"mean_corner_error": float(np.mean(corner_errors)),
"std_corner_error": float(np.std(corner_errors)),
"median_corner_error": float(np.median(corner_errors)),
"max_corner_error": float(np.max(corner_errors)),
"min_corner_error": float(np.min(corner_errors)),
}
print("\nEvaluation Results:")
for key, value in metrics.items():
print(f" {key}: {value:.6f}")
return metrics
def main():
"""Main inference function."""
parser = argparse.ArgumentParser(description="Inference for homography estimation")
# Model arguments
parser.add_argument(
"--model_path",
type=str,
required=True,
help="Path to trained model checkpoint",
)
parser.add_argument(
"--config_path",
type=str,
help="Path to model configuration file (optional)",
)
parser.add_argument(
"--device",
type=str,
choices=["cuda", "cpu"],
help="Device to run inference on",
)
# Inference mode
parser.add_argument(
"--mode",
type=str,
default="single",
choices=["single", "dataset", "batch"],
help="Inference mode",
)
# Single image mode
parser.add_argument(
"--google_path",
type=str,
help="Path to Google map image (single mode)",
)
parser.add_argument(
"--yandex_path",
type=str,
help="Path to Yandex map image (single mode)",
)
parser.add_argument(
"--output_vis",
type=str,
help="Path to save visualization (single mode)",
)
# Dataset mode
parser.add_argument(
"--dataset_dir",
type=str,
default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
help="Directory containing image pairs (dataset mode)",
)
parser.add_argument(
"--num_samples",
type=int,
help="Number of samples to evaluate (dataset mode)",
)
parser.add_argument(
"--save_vis_dir",
type=str,
help="Directory to save visualizations (dataset mode)",
)
parser.add_argument(
"--save_results",
type=str,
help="Path to save evaluation results (dataset mode)",
)
args = parser.parse_args()
# Create inference object
inference = HomographyInference(
model_path=args.model_path,
config_path=args.config_path,
device=args.device,
)
if args.mode == "single":
# Single image pair inference
if not args.google_path or not args.yandex_path:
raise ValueError(
"Both --google_path and --yandex_path are required for single mode"
)
print(f"Processing single image pair:")
print(f" Google: {args.google_path}")
print(f" Yandex: {args.yandex_path}")
# Predict homography
homography = inference.predict_from_paths(args.google_path, args.yandex_path)
print(f"\nPredicted homography matrix:")
print(homography.cpu().numpy())
# Visualize alignment
if args.output_vis:
google_img = Image.open(args.google_path)
yandex_img = Image.open(args.yandex_path)
inference.visualize_alignment(
google_img,
yandex_img,
homography.cpu().numpy(),
save_path=args.output_vis,
show=True,
)
elif args.mode == "dataset":
# Evaluate on dataset
metrics = inference.evaluate_on_dataset(
dataset_dir=args.dataset_dir,
num_samples=args.num_samples,
save_dir=args.save_vis_dir,
)
# Save results if requested
if args.save_results:
with open(args.save_results, "w") as f:
json.dump(metrics, f, indent=2)
print(f"\nResults saved to {args.save_results}")
elif args.mode == "batch":
# Batch processing (placeholder for future implementation)
print("Batch mode not yet implemented")
# Could implement processing multiple image pairs from a directory
else:
raise ValueError(f"Unknown mode: {args.mode}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,611 @@
"""
Training script for homography estimation between Google and Yandex map images.
This script trains a CNN model to estimate homography matrices that align
Google map images with Yandex map images.
"""
import argparse
import json
import os
import time
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from homography import HomographyDataset, create_data_loaders
from homography_cnn import HomographyCNN, HomographyLoss, create_homography_model
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
class HomographyTrainer:
"""Trainer class for homography estimation model."""
def __init__(
self,
model: nn.Module,
train_loader: DataLoader,
val_loader: DataLoader,
device: torch.device,
config: Dict,
):
"""
Initialize the trainer.
Args:
model: Homography estimation model
train_loader: Training data loader
val_loader: Validation data loader
device: Device to run training on
config: Training configuration dictionary
"""
self.model = model.to(device)
self.train_loader = train_loader
self.val_loader = val_loader
self.device = device
self.config = config
# Loss function
self.criterion = HomographyLoss(
matrix_weight=config.get("matrix_weight", 1.0),
geometric_weight=config.get("geometric_weight", 0.5),
reg_weight=config.get("reg_weight", 0.1),
grid_size=config.get("grid_size", 8),
).to(device)
# Optimizer
optimizer_name = config.get("optimizer", "adam").lower()
lr = config.get("learning_rate", 1e-3)
weight_decay = config.get("weight_decay", 1e-4)
if optimizer_name == "adam":
self.optimizer = optim.Adam(
self.model.parameters(), lr=lr, weight_decay=weight_decay
)
elif optimizer_name == "adamw":
self.optimizer = optim.AdamW(
self.model.parameters(), lr=lr, weight_decay=weight_decay
)
elif optimizer_name == "sgd":
self.optimizer = optim.SGD(
self.model.parameters(),
lr=lr,
momentum=config.get("momentum", 0.9),
weight_decay=weight_decay,
)
else:
raise ValueError(f"Unknown optimizer: {optimizer_name}")
# Learning rate scheduler
scheduler_name = config.get("scheduler", "plateau").lower()
if scheduler_name == "plateau":
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
self.optimizer,
mode="min",
factor=config.get("scheduler_factor", 0.5),
patience=config.get("scheduler_patience", 5),
verbose=True,
)
elif scheduler_name == "cosine":
self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
self.optimizer,
T_max=config.get("epochs", 100),
eta_min=config.get("min_lr", 1e-6),
)
elif scheduler_name == "step":
self.scheduler = optim.lr_scheduler.StepLR(
self.optimizer,
step_size=config.get("step_size", 30),
gamma=config.get("gamma", 0.1),
)
else:
self.scheduler = None
# Training state
self.current_epoch = 0
self.best_val_loss = float("inf")
self.train_losses: List[float] = []
self.val_losses: List[float] = []
self.val_metrics: List[Dict] = []
# Create output directory
self.output_dir = Path(config.get("output_dir", "runs/homography"))
self.output_dir.mkdir(parents=True, exist_ok=True)
# TensorBoard writer
self.writer = SummaryWriter(log_dir=self.output_dir / "tensorboard")
# Save configuration
config_path = self.output_dir / "config.json"
with open(config_path, "w") as f:
json.dump(config, f, indent=2)
print(f"Training configuration saved to {config_path}")
print(
f"Model has {sum(p.numel() for p in self.model.parameters()):,} parameters"
)
def train_epoch(self) -> float:
"""
Train for one epoch.
Returns:
Average training loss for the epoch
"""
self.model.train()
total_loss = 0.0
num_batches = len(self.train_loader)
progress_bar = tqdm(self.train_loader, desc=f"Epoch {self.current_epoch + 1}")
for batch_idx, batch in enumerate(progress_bar):
# Move data to device
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target_homography = batch["homography"].to(self.device)
# Forward pass
self.optimizer.zero_grad()
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
# Compute loss
loss = self.criterion(
pred_homography,
target_homography,
google_img,
yandex_img,
)
# Backward pass
loss.backward()
# Gradient clipping
if self.config.get("grad_clip", 1.0) > 0:
torch.nn.utils.clip_grad_norm_(
self.model.parameters(),
self.config.get("grad_clip", 1.0),
)
# Optimizer step
self.optimizer.step()
# Update statistics
total_loss += loss.item()
# Update progress bar
progress_bar.set_postfix({"loss": loss.item()})
# Log batch loss to TensorBoard
global_step = self.current_epoch * num_batches + batch_idx
self.writer.add_scalar("train/batch_loss", loss.item(), global_step)
avg_loss = total_loss / num_batches
self.train_losses.append(avg_loss)
return avg_loss
@torch.no_grad()
def validate(self) -> Tuple[float, Dict]:
"""
Validate the model.
Returns:
Tuple of (average validation loss, validation metrics)
"""
self.model.eval()
total_loss = 0.0
all_metrics = []
progress_bar = tqdm(self.val_loader, desc="Validation")
for batch in progress_bar:
# Move data to device
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target_homography = batch["homography"].to(self.device)
# Forward pass
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
# Compute loss
loss = self.criterion(
pred_homography,
target_homography,
google_img,
yandex_img,
)
# Compute metrics
metrics = self.criterion.compute_metrics(pred_homography, target_homography)
# Update statistics
total_loss += loss.item()
all_metrics.append(metrics)
# Update progress bar
progress_bar.set_postfix({"loss": loss.item()})
avg_loss = total_loss / len(self.val_loader)
self.val_losses.append(avg_loss)
# Aggregate metrics
avg_metrics = {}
for key in all_metrics[0].keys():
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
self.val_metrics.append(avg_metrics)
return avg_loss, avg_metrics
def save_checkpoint(self, is_best: bool = False):
"""Save model checkpoint."""
checkpoint = {
"epoch": self.current_epoch,
"model_state_dict": self.model.state_dict(),
"optimizer_state_dict": self.optimizer.state_dict(),
"train_losses": self.train_losses,
"val_losses": self.val_losses,
"val_metrics": self.val_metrics,
"best_val_loss": self.best_val_loss,
"config": self.config,
}
if self.scheduler is not None:
checkpoint["scheduler_state_dict"] = self.scheduler.state_dict()
# Save latest checkpoint
checkpoint_path = self.output_dir / "checkpoint_latest.pth"
torch.save(checkpoint, checkpoint_path)
# Save best checkpoint
if is_best:
best_path = self.output_dir / "checkpoint_best.pth"
torch.save(checkpoint, best_path)
print(f"Best model saved to {best_path}")
def load_checkpoint(self, checkpoint_path: str):
"""Load model checkpoint."""
checkpoint = torch.load(checkpoint_path, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"])
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
if self.scheduler is not None and "scheduler_state_dict" in checkpoint:
self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
self.current_epoch = checkpoint["epoch"]
self.train_losses = checkpoint["train_losses"]
self.val_losses = checkpoint["val_losses"]
self.val_metrics = checkpoint["val_metrics"]
self.best_val_loss = checkpoint["best_val_loss"]
print(f"Loaded checkpoint from epoch {self.current_epoch}")
def train(self, num_epochs: int):
"""
Train the model for specified number of epochs.
Args:
num_epochs: Number of epochs to train
"""
print(f"Starting training for {num_epochs} epochs...")
start_time = time.time()
for epoch in range(num_epochs):
self.current_epoch = epoch
# Train for one epoch
train_loss = self.train_epoch()
# Validate
val_loss, val_metrics = self.validate()
# Update learning rate scheduler
if self.scheduler is not None:
if isinstance(self.scheduler, optim.lr_scheduler.ReduceLROnPlateau):
self.scheduler.step(val_loss)
else:
self.scheduler.step()
# Log to TensorBoard
self.writer.add_scalar("train/epoch_loss", train_loss, epoch)
self.writer.add_scalar("val/epoch_loss", val_loss, epoch)
for metric_name, metric_value in val_metrics.items():
self.writer.add_scalar(f"val/{metric_name}", metric_value, epoch)
# Print epoch summary
print(f"\nEpoch {epoch + 1}/{num_epochs}:")
print(f" Train Loss: {train_loss:.6f}")
print(f" Val Loss: {val_loss:.6f}")
print(" Val Metrics:")
for metric_name, metric_value in val_metrics.items():
print(f" {metric_name}: {metric_value:.6f}")
# Save checkpoint
is_best = val_loss < self.best_val_loss
if is_best:
self.best_val_loss = val_loss
self.save_checkpoint(is_best=is_best)
# Early stopping
if self.config.get("early_stopping_patience", 0) > 0:
if (
epoch - np.argmin(self.val_losses)
>= self.config["early_stopping_patience"]
):
print(f"Early stopping at epoch {epoch + 1}")
break
# Training completed
training_time = time.time() - start_time
print(f"\nTraining completed in {training_time:.2f} seconds")
print(f"Best validation loss: {self.best_val_loss:.6f}")
# Save final model
final_model_path = self.output_dir / "model_final.pth"
torch.save(self.model.state_dict(), final_model_path)
print(f"Final model saved to {final_model_path}")
# Close TensorBoard writer
self.writer.close()
def evaluate(self, test_loader: Optional[DataLoader] = None) -> Dict:
"""
Evaluate the model on test data.
Args:
test_loader: Test data loader (uses validation loader if None)
Returns:
Dictionary of evaluation metrics
"""
if test_loader is None:
test_loader = self.val_loader
self.model.eval()
all_metrics = []
print("Evaluating model...")
with torch.no_grad():
for batch in tqdm(test_loader):
# Move data to device
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target_homography = batch["homography"].to(self.device)
# Forward pass
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
# Compute metrics
metrics = self.criterion.compute_metrics(
pred_homography, target_homography
)
all_metrics.append(metrics)
# Aggregate metrics
avg_metrics = {}
for key in all_metrics[0].keys():
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
# Print evaluation results
print("\nEvaluation Results:")
for metric_name, metric_value in avg_metrics.items():
print(f" {metric_name}: {metric_value:.6f}")
# Save evaluation results
eval_path = self.output_dir / "evaluation_results.json"
with open(eval_path, "w") as f:
json.dump(avg_metrics, f, indent=2)
print(f"Evaluation results saved to {eval_path}")
return avg_metrics
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Train homography estimation model")
# Data arguments
parser.add_argument(
"--data_dir",
type=str,
default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
help="Directory containing image pairs",
)
parser.add_argument(
"--batch_size", type=int, default=32, help="Batch size for training"
)
parser.add_argument(
"--image_size",
type=int,
nargs=2,
default=[256, 256],
help="Image size (height width)",
)
parser.add_argument(
"--train_split", type=float, default=0.8, help="Train/validation split ratio"
)
parser.add_argument(
"--num_workers", type=int, default=4, help="Number of data loader workers"
)
# Model arguments
parser.add_argument(
"--model_type", type=str, default="cnn", choices=["cnn"], help="Model type"
)
parser.add_argument(
"--hidden_channels", type=int, default=64, help="Number of hidden channels"
)
parser.add_argument(
"--num_blocks", type=int, default=4, help="Number of convolutional blocks"
)
parser.add_argument("--dropout_rate", type=float, default=0.3, help="Dropout rate")
parser.add_argument(
"--use_batch_norm", action="store_true", help="Use batch normalization"
)
# Training arguments
parser.add_argument("--epochs", type=int, default=100, help="Number of epochs")
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate")
parser.add_argument("--weight_decay", type=float, default=1e-4, help="Weight decay")
parser.add_argument(
"--optimizer", type=str, default="adam", choices=["adam", "adamw", "sgd"]
)
parser.add_argument(
"--scheduler",
type=str,
default="plateau",
choices=["plateau", "cosine", "step", "none"],
)
parser.add_argument(
"--grad_clip", type=float, default=1.0, help="Gradient clipping value"
)
# Loss arguments
parser.add_argument(
"--matrix_weight", type=float, default=1.0, help="Weight for matrix loss"
)
parser.add_argument(
"--geometric_weight",
type=float,
default=0.5,
help="Weight for geometric loss",
)
parser.add_argument(
"--reg_weight", type=float, default=0.1, help="Weight for regularization loss"
)
# Other arguments
parser.add_argument(
"--output_dir",
type=str,
default="runs/homography",
help="Output directory for checkpoints and logs",
)
parser.add_argument(
"--resume",
type=str,
help="Path to checkpoint to resume training from",
)
parser.add_argument(
"--eval_only",
action="store_true",
help="Only evaluate the model (no training)",
)
parser.add_argument(
"--seed", type=int, default=42, help="Random seed for reproducibility"
)
return parser.parse_args()
def main():
"""Main training function."""
args = parse_args()
# Set random seeds for reproducibility
torch.manual_seed(args.seed)
np.random.seed(args.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Create data loaders
print("Creating data loaders...")
train_loader, val_loader = create_data_loaders(
root_dir=args.data_dir,
batch_size=args.batch_size,
train_split=args.train_split,
num_workers=args.num_workers,
image_size=tuple(args.image_size),
augment_train=True,
augment_val=False,
)
print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
# Create model
print("Creating model...")
model = create_homography_model(
model_type=args.model_type,
input_size=tuple(args.image_size),
input_channels=3,
hidden_channels=args.hidden_channels,
num_blocks=args.num_blocks,
dropout_rate=args.dropout_rate,
use_batch_norm=args.use_batch_norm,
)
# Create trainer configuration
config = {
# Model config
"model_type": args.model_type,
"hidden_channels": args.hidden_channels,
"num_blocks": args.num_blocks,
"dropout_rate": args.dropout_rate,
"use_batch_norm": args.use_batch_norm,
"image_size": args.image_size,
# Training config
"epochs": args.epochs,
"batch_size": args.batch_size,
"learning_rate": args.lr,
"weight_decay": args.weight_decay,
"optimizer": args.optimizer,
"scheduler": args.scheduler,
"grad_clip": args.grad_clip,
# Loss config
"matrix_weight": args.matrix_weight,
"geometric_weight": args.geometric_weight,
"reg_weight": args.reg_weight,
"grid_size": 8,
# Data config
"data_dir": args.data_dir,
"train_split": args.train_split,
"num_workers": args.num_workers,
# Output config
"output_dir": args.output_dir,
"seed": args.seed,
}
# Create trainer
trainer = HomographyTrainer(
model=model,
train_loader=train_loader,
val_loader=val_loader,
device=device,
config=config,
)
# Resume from checkpoint if specified
if args.resume:
print(f"Resuming from checkpoint: {args.resume}")
trainer.load_checkpoint(args.resume)
# Evaluate only mode
if args.eval_only:
trainer.evaluate()
return
# Train the model
trainer.train(num_epochs=args.epochs)
# Final evaluation
print("\nPerforming final evaluation...")
trainer.evaluate()
print("\nTraining completed successfully!")
print(f"Results saved to: {args.output_dir}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,611 @@
"""
Training script for homography estimation between Google and Yandex map images.
This script trains a CNN model to estimate homography matrices that align
Google map images with Yandex map images.
"""
import argparse
import json
import os
import time
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from homography import HomographyDataset, create_data_loaders
from homography_cnn import HomographyCNN, HomographyLoss, create_homography_model
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
class HomographyTrainer:
"""Trainer class for homography estimation model."""
def __init__(
self,
model: nn.Module,
train_loader: DataLoader,
val_loader: DataLoader,
device: torch.device,
config: Dict,
):
"""
Initialize the trainer.
Args:
model: Homography estimation model
train_loader: Training data loader
val_loader: Validation data loader
device: Device to run training on
config: Training configuration dictionary
"""
self.model = model.to(device)
self.train_loader = train_loader
self.val_loader = val_loader
self.device = device
self.config = config
# Loss function
self.criterion = HomographyLoss(
matrix_weight=config.get("matrix_weight", 1.0),
geometric_weight=config.get("geometric_weight", 0.5),
reg_weight=config.get("reg_weight", 0.1),
grid_size=config.get("grid_size", 8),
).to(device)
# Optimizer
optimizer_name = config.get("optimizer", "adam").lower()
lr = config.get("learning_rate", 1e-3)
weight_decay = config.get("weight_decay", 1e-4)
if optimizer_name == "adam":
self.optimizer = optim.Adam(
self.model.parameters(), lr=lr, weight_decay=weight_decay
)
elif optimizer_name == "adamw":
self.optimizer = optim.AdamW(
self.model.parameters(), lr=lr, weight_decay=weight_decay
)
elif optimizer_name == "sgd":
self.optimizer = optim.SGD(
self.model.parameters(),
lr=lr,
momentum=config.get("momentum", 0.9),
weight_decay=weight_decay,
)
else:
raise ValueError(f"Unknown optimizer: {optimizer_name}")
# Learning rate scheduler
scheduler_name = config.get("scheduler", "plateau").lower()
if scheduler_name == "plateau":
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
self.optimizer,
mode="min",
factor=config.get("scheduler_factor", 0.5),
patience=config.get("scheduler_patience", 5),
verbose=True,
)
elif scheduler_name == "cosine":
self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
self.optimizer,
T_max=config.get("epochs", 100),
eta_min=config.get("min_lr", 1e-6),
)
elif scheduler_name == "step":
self.scheduler = optim.lr_scheduler.StepLR(
self.optimizer,
step_size=config.get("step_size", 30),
gamma=config.get("gamma", 0.1),
)
else:
self.scheduler = None
# Training state
self.current_epoch = 0
self.best_val_loss = float("inf")
self.train_losses: List[float] = []
self.val_losses: List[float] = []
self.val_metrics: List[Dict] = []
# Create output directory
self.output_dir = Path(config.get("output_dir", "runs/homography"))
self.output_dir.mkdir(parents=True, exist_ok=True)
# TensorBoard writer
self.writer = SummaryWriter(log_dir=self.output_dir / "tensorboard")
# Save configuration
config_path = self.output_dir / "config.json"
with open(config_path, "w") as f:
json.dump(config, f, indent=2)
print(f"Training configuration saved to {config_path}")
print(
f"Model has {sum(p.numel() for p in self.model.parameters()):,} parameters"
)
def train_epoch(self) -> float:
"""
Train for one epoch.
Returns:
Average training loss for the epoch
"""
self.model.train()
total_loss = 0.0
num_batches = len(self.train_loader)
progress_bar = tqdm(self.train_loader, desc=f"Epoch {self.current_epoch + 1}")
for batch_idx, batch in enumerate(progress_bar):
# Move data to device
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target_homography = batch["homography"].to(self.device)
# Forward pass
self.optimizer.zero_grad()
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
# Compute loss
loss = self.criterion(
pred_homography,
target_homography,
google_img,
yandex_img,
)
# Backward pass
loss.backward()
# Gradient clipping
if self.config.get("grad_clip", 1.0) > 0:
torch.nn.utils.clip_grad_norm_(
self.model.parameters(),
self.config.get("grad_clip", 1.0),
)
# Optimizer step
self.optimizer.step()
# Update statistics
total_loss += loss.item()
# Update progress bar
progress_bar.set_postfix({"loss": loss.item()})
# Log batch loss to TensorBoard
global_step = self.current_epoch * num_batches + batch_idx
self.writer.add_scalar("train/batch_loss", loss.item(), global_step)
avg_loss = total_loss / num_batches
self.train_losses.append(avg_loss)
return avg_loss
@torch.no_grad()
def validate(self) -> Tuple[float, Dict]:
"""
Validate the model.
Returns:
Tuple of (average validation loss, validation metrics)
"""
self.model.eval()
total_loss = 0.0
all_metrics = []
progress_bar = tqdm(self.val_loader, desc="Validation")
for batch in progress_bar:
# Move data to device
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target_homography = batch["homography"].to(self.device)
# Forward pass
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
# Compute loss
loss = self.criterion(
pred_homography,
target_homography,
google_img,
yandex_img,
)
# Compute metrics
metrics = self.criterion.compute_metrics(pred_homography, target_homography)
# Update statistics
total_loss += loss.item()
all_metrics.append(metrics)
# Update progress bar
progress_bar.set_postfix({"loss": loss.item()})
avg_loss = total_loss / len(self.val_loader)
self.val_losses.append(avg_loss)
# Aggregate metrics
avg_metrics = {}
for key in all_metrics[0].keys():
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
self.val_metrics.append(avg_metrics)
return avg_loss, avg_metrics
def save_checkpoint(self, is_best: bool = False):
"""Save model checkpoint."""
checkpoint = {
"epoch": self.current_epoch,
"model_state_dict": self.model.state_dict(),
"optimizer_state_dict": self.optimizer.state_dict(),
"train_losses": self.train_losses,
"val_losses": self.val_losses,
"val_metrics": self.val_metrics,
"best_val_loss": self.best_val_loss,
"config": self.config,
}
if self.scheduler is not None:
checkpoint["scheduler_state_dict"] = self.scheduler.state_dict()
# Save latest checkpoint
checkpoint_path = self.output_dir / "checkpoint_latest.pth"
torch.save(checkpoint, checkpoint_path)
# Save best checkpoint
if is_best:
best_path = self.output_dir / "checkpoint_best.pth"
torch.save(checkpoint, best_path)
print(f"Best model saved to {best_path}")
def load_checkpoint(self, checkpoint_path: str):
"""Load model checkpoint."""
checkpoint = torch.load(checkpoint_path, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"])
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
if self.scheduler is not None and "scheduler_state_dict" in checkpoint:
self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
self.current_epoch = checkpoint["epoch"]
self.train_losses = checkpoint["train_losses"]
self.val_losses = checkpoint["val_losses"]
self.val_metrics = checkpoint["val_metrics"]
self.best_val_loss = checkpoint["best_val_loss"]
print(f"Loaded checkpoint from epoch {self.current_epoch}")
def train(self, num_epochs: int):
"""
Train the model for specified number of epochs.
Args:
num_epochs: Number of epochs to train
"""
print(f"Starting training for {num_epochs} epochs...")
start_time = time.time()
for epoch in range(num_epochs):
self.current_epoch = epoch
# Train for one epoch
train_loss = self.train_epoch()
# Validate
val_loss, val_metrics = self.validate()
# Update learning rate scheduler
if self.scheduler is not None:
if isinstance(self.scheduler, optim.lr_scheduler.ReduceLROnPlateau):
self.scheduler.step(val_loss)
else:
self.scheduler.step()
# Log to TensorBoard
self.writer.add_scalar("train/epoch_loss", train_loss, epoch)
self.writer.add_scalar("val/epoch_loss", val_loss, epoch)
for metric_name, metric_value in val_metrics.items():
self.writer.add_scalar(f"val/{metric_name}", metric_value, epoch)
# Print epoch summary
print(f"\nEpoch {epoch + 1}/{num_epochs}:")
print(f" Train Loss: {train_loss:.6f}")
print(f" Val Loss: {val_loss:.6f}")
print(" Val Metrics:")
for metric_name, metric_value in val_metrics.items():
print(f" {metric_name}: {metric_value:.6f}")
# Save checkpoint
is_best = val_loss < self.best_val_loss
if is_best:
self.best_val_loss = val_loss
self.save_checkpoint(is_best=is_best)
# Early stopping
if self.config.get("early_stopping_patience", 0) > 0:
if (
epoch - np.argmin(self.val_losses)
>= self.config["early_stopping_patience"]
):
print(f"Early stopping at epoch {epoch + 1}")
break
# Training completed
training_time = time.time() - start_time
print(f"\nTraining completed in {training_time:.2f} seconds")
print(f"Best validation loss: {self.best_val_loss:.6f}")
# Save final model
final_model_path = self.output_dir / "model_final.pth"
torch.save(self.model.state_dict(), final_model_path)
print(f"Final model saved to {final_model_path}")
# Close TensorBoard writer
self.writer.close()
def evaluate(self, test_loader: Optional[DataLoader] = None) -> Dict:
"""
Evaluate the model on test data.
Args:
test_loader: Test data loader (uses validation loader if None)
Returns:
Dictionary of evaluation metrics
"""
if test_loader is None:
test_loader = self.val_loader
self.model.eval()
all_metrics = []
print("Evaluating model...")
with torch.no_grad():
for batch in tqdm(test_loader):
# Move data to device
google_img = batch["google_img"].to(self.device)
yandex_img = batch["yandex_img"].to(self.device)
target_homography = batch["homography"].to(self.device)
# Forward pass
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
# Compute metrics
metrics = self.criterion.compute_metrics(
pred_homography, target_homography
)
all_metrics.append(metrics)
# Aggregate metrics
avg_metrics = {}
for key in all_metrics[0].keys():
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
# Print evaluation results
print("\nEvaluation Results:")
for metric_name, metric_value in avg_metrics.items():
print(f" {metric_name}: {metric_value:.6f}")
# Save evaluation results
eval_path = self.output_dir / "evaluation_results.json"
with open(eval_path, "w") as f:
json.dump(avg_metrics, f, indent=2)
print(f"Evaluation results saved to {eval_path}")
return avg_metrics
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Train homography estimation model")
# Data arguments
parser.add_argument(
"--data_dir",
type=str,
default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
help="Directory containing image pairs",
)
parser.add_argument(
"--batch_size", type=int, default=32, help="Batch size for training"
)
parser.add_argument(
"--image_size",
type=int,
nargs=2,
default=[256, 256],
help="Image size (height width)",
)
parser.add_argument(
"--train_split", type=float, default=0.8, help="Train/validation split ratio"
)
parser.add_argument(
"--num_workers", type=int, default=4, help="Number of data loader workers"
)
# Model arguments
parser.add_argument(
"--model_type", type=str, default="cnn", choices=["cnn"], help="Model type"
)
parser.add_argument(
"--hidden_channels", type=int, default=64, help="Number of hidden channels"
)
parser.add_argument(
"--num_blocks", type=int, default=4, help="Number of convolutional blocks"
)
parser.add_argument("--dropout_rate", type=float, default=0.3, help="Dropout rate")
parser.add_argument(
"--use_batch_norm", action="store_true", help="Use batch normalization"
)
# Training arguments
parser.add_argument("--epochs", type=int, default=100, help="Number of epochs")
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate")
parser.add_argument("--weight_decay", type=float, default=1e-4, help="Weight decay")
parser.add_argument(
"--optimizer", type=str, default="adam", choices=["adam", "adamw", "sgd"]
)
parser.add_argument(
"--scheduler",
type=str,
default="plateau",
choices=["plateau", "cosine", "step", "none"],
)
parser.add_argument(
"--grad_clip", type=float, default=1.0, help="Gradient clipping value"
)
# Loss arguments
parser.add_argument(
"--matrix_weight", type=float, default=1.0, help="Weight for matrix loss"
)
parser.add_argument(
"--geometric_weight",
type=float,
default=0.5,
help="Weight for geometric loss",
)
parser.add_argument(
"--reg_weight", type=float, default=0.1, help="Weight for regularization loss"
)
# Other arguments
parser.add_argument(
"--output_dir",
type=str,
default="runs/homography",
help="Output directory for checkpoints and logs",
)
parser.add_argument(
"--resume",
type=str,
help="Path to checkpoint to resume training from",
)
parser.add_argument(
"--eval_only",
action="store_true",
help="Only evaluate the model (no training)",
)
parser.add_argument(
"--seed", type=int, default=42, help="Random seed for reproducibility"
)
return parser.parse_args()
def main():
"""Main training function."""
args = parse_args()
# Set random seeds for reproducibility
torch.manual_seed(args.seed)
np.random.seed(args.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Create data loaders
print("Creating data loaders...")
train_loader, val_loader = create_data_loaders(
root_dir=args.data_dir,
batch_size=args.batch_size,
train_split=args.train_split,
num_workers=args.num_workers,
image_size=tuple(args.image_size),
augment_train=True,
augment_val=False,
)
print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
# Create model
print("Creating model...")
model = create_homography_model(
model_type=args.model_type,
input_size=tuple(args.image_size),
input_channels=3,
hidden_channels=args.hidden_channels,
num_blocks=args.num_blocks,
dropout_rate=args.dropout_rate,
use_batch_norm=args.use_batch_norm,
)
# Create trainer configuration
config = {
# Model config
"model_type": args.model_type,
"hidden_channels": args.hidden_channels,
"num_blocks": args.num_blocks,
"dropout_rate": args.dropout_rate,
"use_batch_norm": args.use_batch_norm,
"image_size": args.image_size,
# Training config
"epochs": args.epochs,
"batch_size": args.batch_size,
"learning_rate": args.lr,
"weight_decay": args.weight_decay,
"optimizer": args.optimizer,
"scheduler": args.scheduler,
"grad_clip": args.grad_clip,
# Loss config
"matrix_weight": args.matrix_weight,
"geometric_weight": args.geometric_weight,
"reg_weight": args.reg_weight,
"grid_size": 8,
# Data config
"data_dir": args.data_dir,
"train_split": args.train_split,
"num_workers": args.num_workers,
# Output config
"output_dir": args.output_dir,
"seed": args.seed,
}
# Create trainer
trainer = HomographyTrainer(
model=model,
train_loader=train_loader,
val_loader=val_loader,
device=device,
config=config,
)
# Resume from checkpoint if specified
if args.resume:
print(f"Resuming from checkpoint: {args.resume}")
trainer.load_checkpoint(args.resume)
# Evaluate only mode
if args.eval_only:
trainer.evaluate()
return
# Train the model
trainer.train(num_epochs=args.epochs)
# Final evaluation
print("\nPerforming final evaluation...")
trainer.evaluate()
print("\nTraining completed successfully!")
print(f"Results saved to: {args.output_dir}")
if __name__ == "__main__":
main()

View File

@@ -1,418 +0,0 @@
"""
Расширенная система автопилота с улучшенным отслеживанием координат и угла БПЛА
"""
import random
import cv2
import numpy as np
from PIL import Image
import math
from typing import Optional, Tuple, Dict, List
from dataclasses import dataclass
import time
random.seed(1)
@dataclass
class DroneState:
"""Структура для хранения состояния БПЛА"""
x: float = 0.0
y: float = 0.0
altitude: float = 10.0
angle: float = 0.0
velocity_x: float = 0.0
velocity_y: float = 0.0
angular_velocity: float = 0.0
timestamp: float = 0.0
confidence: float = 1.0
class AdvancedAutoPilot:
"""
Расширенный автопилот с улучшенным отслеживанием координат и угла БПЛА
"""
def __init__(self,
initial_x: float = 0.0,
initial_y: float = 0.0,
initial_altitude: float = 10.0,
camera_fov: float = 60.0,
image_width: int = 640,
image_height: int = 480):
# Состояние БПЛА
self.state = DroneState(
x=initial_x,
y=initial_y,
altitude=initial_altitude,
timestamp=time.time()
)
# Параметры камеры
self.camera_fov = math.radians(camera_fov)
self.image_width = image_width
self.image_height = image_height
# Вычисляем коэффициент перевода пикселей в метры
self.update_pixel_to_meter_ratio()
# История состояний для фильтрации
self.state_history: List[DroneState] = []
self.max_history_size = 10
# Параметры фильтрации
self.velocity_filter_alpha = 0.3 # Коэффициент сглаживания скорости
self.position_filter_alpha = 0.7 # Коэффициент сглаживания позиции
# Инициализация детекторов
self.orb_detector = cv2.ORB_create(
nfeatures=2000,
scaleFactor=1.2,
nlevels=8,
edgeThreshold=31,
firstLevel=0,
WTA_K=2,
patchSize=31,
fastThreshold=20
)
self.bf_matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# Предыдущее изображение
self.prev_image: Optional[np.ndarray] = None
self.prev_timestamp: float = 0.0
# Счетчики
self.frame_count = 0
self.successful_tracking_frames = 0
# Статистика
self.tracking_stats = {
'total_frames': 0,
'successful_frames': 0,
'failed_frames': 0,
'avg_processing_time': 0.0
}
def update_pixel_to_meter_ratio(self):
"""Обновляет коэффициент перевода пикселей в метры на основе высоты и FOV камеры"""
# Вычисляем размер пикселя на земле при текущей высоте
ground_distance = 2 * self.state.altitude * math.tan(self.camera_fov / 2)
self.pixel_to_meter_ratio = ground_distance / self.image_width
def detect_and_match_keypoints(self, img1: np.ndarray, img2: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[List], Optional[List], Optional[List]]:
"""Обнаруживает и сопоставляет ключевые точки между двумя изображениями"""
# Обнаружение ключевых точек и дескрипторов
kp1, des1 = self.orb_detector.detectAndCompute(img1, None)
kp2, des2 = self.orb_detector.detectAndCompute(img2, None)
if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
return None, None, None, None, None
# Сопоставление ключевых точек
matches = self.bf_matcher.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)
# Фильтрация хороших совпадений
good_matches = []
for match in matches:
if match.distance < 50: # Порог расстояния
good_matches.append(match)
if len(good_matches) < 8: # Увеличиваем минимальное количество совпадений
return None, None, None, None, None
# Извлечение координат сопоставленных точек
src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
return src_pts, dst_pts, good_matches, kp1, kp2
def estimate_transformation_matrix(self, src_pts: np.ndarray, dst_pts: np.ndarray) -> Optional[Dict]:
"""Оценивает матрицу трансформации с улучшенной обработкой ошибок"""
try:
# Используем RANSAC для оценки матрицы гомографии
H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
if H is None:
return None
# Проверяем качество матрицы
if not self.is_valid_homography(H):
return None
# Извлекаем параметры трансформации
a11, a12 = H[0, 0], H[0, 1]
a21, a22 = H[1, 0], H[1, 1]
tx, ty = H[0, 2], H[1, 2]
# Вычисляем угол поворота
angle = np.arctan2(a21, a11)
# Вычисляем масштаб
scale_x = np.sqrt(a11**2 + a21**2)
scale_y = np.sqrt(a12**2 + a22**2)
scale = (scale_x + scale_y) / 2
# Проверяем разумность масштаба
if scale < 0.5 or scale > 2.0:
return None
return {
'translation': (tx, ty),
'rotation': angle,
'scale': scale,
'homography': H,
'mask': mask,
'inliers_ratio': np.sum(mask) / len(mask) if mask is not None else 0.0
}
except Exception as e:
print(f"Ошибка при оценке трансформации: {e}")
return None
def is_valid_homography(self, H: np.ndarray) -> bool:
"""Проверяет валидность матрицы гомографии"""
if H is None:
return False
# Проверяем, что матрица не вырождена
det = np.linalg.det(H)
if abs(det) < 1e-6:
return False
# Проверяем, что элементы матрицы разумны
if np.any(np.abs(H) > 1000):
return False
return True
def update_drone_position(self, transformation_info: Dict, dt: float):
"""Обновляет позицию и состояние БПЛА с учетом временного интервала"""
tx, ty = transformation_info['translation']
rotation = transformation_info['rotation']
scale = transformation_info['scale']
inliers_ratio = transformation_info.get('inliers_ratio', 1.0)
# Конвертируем смещение в пикселях в метры
dx_meters = tx * self.pixel_to_meter_ratio
dy_meters = ty * self.pixel_to_meter_ratio
# Применяем поворот к смещению (учитываем текущий угол БПЛА)
cos_angle = math.cos(self.state.angle)
sin_angle = math.sin(self.state.angle)
# Поворачиваем смещение в глобальные координаты
dx_global = dx_meters * cos_angle - dy_meters * sin_angle
dy_global = dx_meters * sin_angle + dy_meters * cos_angle
# Вычисляем скорости
velocity_x = dx_global / dt if dt > 0 else 0.0
velocity_y = dy_global / dt if dt > 0 else 0.0
angular_velocity = rotation / dt if dt > 0 else 0.0
# Применяем фильтрацию
self.state.velocity_x = (self.velocity_filter_alpha * velocity_x +
(1 - self.velocity_filter_alpha) * self.state.velocity_x)
self.state.velocity_y = (self.velocity_filter_alpha * velocity_y +
(1 - self.velocity_filter_alpha) * self.state.velocity_y)
self.state.angular_velocity = (self.velocity_filter_alpha * angular_velocity +
(1 - self.velocity_filter_alpha) * self.state.angular_velocity)
# Обновляем позицию с фильтрацией
new_x = self.state.x + dx_global
new_y = self.state.y + dy_global
new_angle = self.state.angle + rotation
self.state.x = (self.position_filter_alpha * new_x +
(1 - self.position_filter_alpha) * self.state.x)
self.state.y = (self.position_filter_alpha * new_y +
(1 - self.position_filter_alpha) * self.state.y)
self.state.angle = (self.position_filter_alpha * new_angle +
(1 - self.position_filter_alpha) * self.state.angle)
# Нормализуем угол
self.state.angle = math.atan2(math.sin(self.state.angle), math.cos(self.state.angle))
# Обновляем временную метку и уверенность
self.state.timestamp = time.time()
self.state.confidence = min(1.0, inliers_ratio * 1.5) # Увеличиваем уверенность при хороших совпадениях
def handle(self, image: Image.Image) -> bool:
"""Обрабатывает новый кадр и обновляет состояние БПЛА"""
start_time = time.time()
self.frame_count += 1
self.tracking_stats['total_frames'] += 1
# Конвертируем изображение
current_image = np.array(image)
current_timestamp = time.time()
if self.prev_image is None:
self.prev_image = current_image
self.prev_timestamp = current_timestamp
return False
# Вычисляем временной интервал
dt = current_timestamp - self.prev_timestamp
# Обнаруживаем и сопоставляем ключевые точки
src_pts, dst_pts, matches, kp1, kp2 = self.detect_and_match_keypoints(self.prev_image, current_image)
if src_pts is not None and dst_pts is not None:
# Оцениваем матрицу трансформации
transformation_info = self.estimate_transformation_matrix(src_pts, dst_pts)
if transformation_info:
# Обновляем позицию БПЛА
self.update_drone_position(transformation_info, dt)
# Добавляем состояние в историю
self.add_state_to_history()
# Обновляем статистику
self.successful_tracking_frames += 1
self.tracking_stats['successful_frames'] += 1
# Выводим информацию
self.print_tracking_info(transformation_info)
# Визуализация
self.visualize_tracking(current_image, kp1, kp2, matches, transformation_info)
success = True
else:
self.tracking_stats['failed_frames'] += 1
success = False
else:
self.tracking_stats['failed_frames'] += 1
success = False
# Обновляем предыдущее изображение
self.prev_image = current_image
self.prev_timestamp = current_timestamp
# Обновляем статистику времени обработки
processing_time = time.time() - start_time
self.tracking_stats['avg_processing_time'] = (
(self.tracking_stats['avg_processing_time'] * (self.frame_count - 1) + processing_time) / self.frame_count
)
return success
def add_state_to_history(self):
"""Добавляет текущее состояние в историю"""
# Создаем копию текущего состояния
state_copy = DroneState(
x=self.state.x,
y=self.state.y,
altitude=self.state.altitude,
angle=self.state.angle,
velocity_x=self.state.velocity_x,
velocity_y=self.state.velocity_y,
angular_velocity=self.state.angular_velocity,
timestamp=self.state.timestamp,
confidence=self.state.confidence
)
self.state_history.append(state_copy)
# Ограничиваем размер истории
if len(self.state_history) > self.max_history_size:
self.state_history.pop(0)
def get_drone_state(self) -> Dict:
"""Возвращает текущее состояние БПЛА в виде словаря"""
return {
'x': self.state.x,
'y': self.state.y,
'altitude': self.state.altitude,
'angle': self.state.angle,
'angle_degrees': math.degrees(self.state.angle),
'velocity_x': self.state.velocity_x,
'velocity_y': self.state.velocity_y,
'angular_velocity': self.state.angular_velocity,
'confidence': self.state.confidence,
'frame_count': self.frame_count,
'timestamp': self.state.timestamp
}
def get_tracking_stats(self) -> Dict:
"""Возвращает статистику отслеживания"""
success_rate = (self.tracking_stats['successful_frames'] /
max(1, self.tracking_stats['total_frames'])) * 100
return {
**self.tracking_stats,
'success_rate_percent': success_rate,
'current_altitude': self.state.altitude,
'pixel_to_meter_ratio': self.pixel_to_meter_ratio
}
def set_altitude(self, altitude: float):
"""Устанавливает высоту БПЛА и пересчитывает масштаб"""
self.state.altitude = altitude
self.update_pixel_to_meter_ratio()
def reset_position(self, x: float = 0.0, y: float = 0.0, angle: float = 0.0):
"""Сбрасывает позицию и угол БПЛА"""
self.state.x = x
self.state.y = y
self.state.angle = angle
self.state.velocity_x = 0.0
self.state.velocity_y = 0.0
self.state.angular_velocity = 0.0
self.state_history.clear()
self.frame_count = 0
self.successful_tracking_frames = 0
def print_tracking_info(self, transformation_info: Dict):
"""Выводит информацию об отслеживании"""
state = self.get_drone_state()
stats = self.get_tracking_stats()
print(f"Frame {self.frame_count}:")
print(f" Position: ({state['x']:.3f}, {state['y']:.3f}) m")
print(f" Angle: {state['angle_degrees']:.1f}°")
print(f" Velocity: ({state['velocity_x']:.2f}, {state['velocity_y']:.2f}) m/s")
print(f" Confidence: {state['confidence']:.2f}")
print(f" Success Rate: {stats['success_rate_percent']:.1f}%")
def visualize_tracking(self, current_image: np.ndarray, kp1, kp2, matches, transformation_info: Dict):
"""Визуализирует процесс отслеживания"""
if kp1 is None or kp2 is None or matches is None:
return
# Рисуем сопоставления
img_matches = cv2.drawMatches(self.prev_image, kp1, current_image, kp2, matches, None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
# Добавляем информацию о состоянии
state = self.get_drone_state()
stats = self.get_tracking_stats()
info_lines = [
f"Position: ({state['x']:.2f}, {state['y']:.2f}) m",
f"Angle: {state['angle_degrees']:.1f}°",
f"Velocity: ({state['velocity_x']:.2f}, {state['velocity_y']:.2f}) m/s",
f"Altitude: {state['altitude']:.1f} m",
f"Confidence: {state['confidence']:.2f}",
f"Success Rate: {stats['success_rate_percent']:.1f}%"
]
for i, line in enumerate(info_lines):
cv2.putText(img_matches, line, (10, 30 + i * 25),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
cv2.imshow('Advanced Drone Tracking', img_matches)
cv2.waitKey(1)
def act(self) -> float:
"""Возвращает угол поворота для управления дроном"""
return self.state.angle
def get_trajectory_history(self) -> Tuple[List[float], List[float], List[float]]:
"""Возвращает историю траектории"""
x_history = [state.x for state in self.state_history]
y_history = [state.y for state in self.state_history]
angle_history = [math.degrees(state.angle) for state in self.state_history]
return x_history, y_history, angle_history

Binary file not shown.

View File

@@ -1,222 +0,0 @@
"""
Примеры использования системы отслеживания БПЛА
"""
import numpy as np
from PIL import Image
import cv2
from autopilot import AutoPilot
import matplotlib.pyplot as plt
def example_basic_usage():
"""Базовый пример использования автопилота"""
print("=== Базовый пример использования ===")
# Создаем автопилот с начальными параметрами
autopilot = AutoPilot(
initial_x=0.0, # Начальная X координата
initial_y=0.0, # Начальная Y координата
initial_altitude=15.0 # Начальная высота
)
# Создаем тестовые изображения
img1 = create_simple_test_image()
img2 = create_simple_test_image(offset_x=20, offset_y=10)
# Обрабатываем изображения
autopilot.handle(img1)
autopilot.handle(img2)
# Получаем состояние
state = autopilot.get_drone_state()
print(f"Позиция БПЛА: ({state['x']:.2f}, {state['y']:.2f})")
print(f"Угол БПЛА: {state['angle_degrees']:.1f}°")
print(f"Высота: {state['altitude']:.1f}м")
print(f"Обработано кадров: {state['frame_count']}")
def example_altitude_change():
"""Пример изменения высоты и пересчета масштаба"""
print("\n=== Пример изменения высоты ===")
autopilot = AutoPilot(initial_altitude=10.0)
# Создаем изображения
img1 = create_simple_test_image()
img2 = create_simple_test_image(offset_x=10, offset_y=5)
# Обрабатываем при высоте 10м
autopilot.handle(img1)
autopilot.handle(img2)
state1 = autopilot.get_drone_state()
print(f"При высоте 10м: ({state1['x']:.2f}, {state1['y']:.2f})")
# Изменяем высоту на 20м
autopilot.set_altitude(20.0)
# Обрабатываем те же изображения при новой высоте
autopilot.reset_position()
autopilot.handle(img1)
autopilot.handle(img2)
state2 = autopilot.get_drone_state()
print(f"При высоте 20м: ({state2['x']:.2f}, {state2['y']:.2f})")
# Коэффициент должен измениться в 2 раза
ratio = state2['x'] / state1['x'] if state1['x'] != 0 else 0
print(f"Коэффициент изменения: {ratio:.2f} (ожидается ~2.0)")
def example_circular_movement():
"""Пример отслеживания кругового движения"""
print("\n=== Пример кругового движения ===")
autopilot = AutoPilot()
# Создаем базовое изображение
base_img = create_simple_test_image()
# Параметры кругового движения
radius = 30
center_x, center_y = 320, 240
num_steps = 8
x_history = []
y_history = []
angle_history = []
# Симулируем круговое движение
for i in range(num_steps):
angle = 2 * np.pi * i / num_steps
# Вычисляем смещение
dx = radius * np.cos(angle)
dy = radius * np.sin(angle)
# Создаем смещенное изображение
current_img = create_simple_test_image(
offset_x=int(dx),
offset_y=int(dy),
rotation=angle * 0.1
)
# Обрабатываем
autopilot.handle(current_img)
# Сохраняем историю
state = autopilot.get_drone_state()
x_history.append(state['x'])
y_history.append(state['y'])
angle_history.append(state['angle_degrees'])
print(f"Шаг {i+1}: ({state['x']:.2f}, {state['y']:.2f}), угол: {state['angle_degrees']:.1f}°")
# Визуализируем траекторию
plot_circular_trajectory(x_history, y_history, angle_history)
def example_position_reset():
"""Пример сброса позиции"""
print("\n=== Пример сброса позиции ===")
autopilot = AutoPilot()
# Создаем и обрабатываем изображения
img1 = create_simple_test_image()
img2 = create_simple_test_image(offset_x=15, offset_y=10)
autopilot.handle(img1)
autopilot.handle(img2)
state_before = autopilot.get_drone_state()
print(f"До сброса: ({state_before['x']:.2f}, {state_before['y']:.2f})")
# Сбрасываем позицию
autopilot.reset_position(x=100.0, y=200.0, angle=np.pi/4)
state_after = autopilot.get_drone_state()
print(f"После сброса: ({state_after['x']:.2f}, {state_after['y']:.2f}), угол: {state_after['angle_degrees']:.1f}°")
def create_simple_test_image(offset_x=0, offset_y=0, rotation=0):
"""Создает простое тестовое изображение с заданным смещением"""
width, height = 640, 480
# Создаем базовое изображение с геометрическими фигурами
img = np.zeros((height, width, 3), dtype=np.uint8)
# Добавляем фигуры
cv2.rectangle(img, (100, 100), (200, 200), (255, 0, 0), -1)
cv2.circle(img, (400, 300), 50, (0, 255, 0), -1)
cv2.line(img, (50, 400), (550, 400), (0, 0, 255), 5)
cv2.rectangle(img, (300, 150), (350, 200), (255, 255, 0), -1)
# Применяем трансформацию
if offset_x != 0 or offset_y != 0 or rotation != 0:
cos_rot = np.cos(rotation)
sin_rot = np.sin(rotation)
M = np.float32([
[cos_rot, -sin_rot, offset_x],
[sin_rot, cos_rot, offset_y]
])
img = cv2.warpAffine(img, M, (width, height),
borderMode=cv2.BORDER_REPLICATE)
return Image.fromarray(img)
def plot_circular_trajectory(x_history, y_history, angle_history):
"""Визуализирует круговую траекторию"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# График траектории
ax1.plot(x_history, y_history, 'b-o', linewidth=2, markersize=8)
ax1.plot(x_history[0], y_history[0], 'go', markersize=12, label='Начало')
ax1.plot(x_history[-1], y_history[-1], 'ro', markersize=12, label='Конец')
ax1.set_xlabel('X координата (м)')
ax1.set_ylabel('Y координата (м)')
ax1.set_title('Круговая траектория БПЛА')
ax1.grid(True)
ax1.legend()
ax1.axis('equal')
# График угла
ax2.plot(range(len(angle_history)), angle_history, 'r-o', linewidth=2, markersize=8)
ax2.set_xlabel('Номер шага')
ax2.set_ylabel('Угол поворота (градусы)')
ax2.set_title('Изменение угла поворота')
ax2.grid(True)
plt.tight_layout()
plt.show()
def example_error_handling():
"""Пример обработки ошибок и граничных случаев"""
print("\n=== Пример обработки ошибок ===")
autopilot = AutoPilot()
# Создаем изображение без текстуры (однородное)
blank_img = Image.fromarray(np.zeros((480, 640, 3), dtype=np.uint8))
# Обрабатываем - должно работать без ошибок
try:
autopilot.handle(blank_img)
autopilot.handle(blank_img)
print("Обработка однородного изображения прошла успешно")
except Exception as e:
print(f"Ошибка при обработке: {e}")
# Проверяем состояние
state = autopilot.get_drone_state()
print(f"Состояние после обработки: ({state['x']:.2f}, {state['y']:.2f})")
if __name__ == "__main__":
print("Примеры использования системы отслеживания БПЛА\n")
# Запускаем все примеры
example_basic_usage()
example_altitude_change()
example_circular_movement()
example_position_reset()
example_error_handling()
print("\nВсе примеры выполнены успешно!")

View File

@@ -1,13 +0,0 @@
from autopilot import AutoPilot, RandomPilot
from simulator import Simulator
from visualization import VisualizationManager
# Создаем менеджер визуализации
viz_manager = VisualizationManager("Drone Autopilot - Global Map & Detection")
# Создаем симулятор с AutoPilot для обработки изображений
# Передаем менеджер визуализации в автопилот
simulator = Simulator(RandomPilot(), AutoPilot(viz_manager=viz_manager), viz_manager=viz_manager)
# Запускаем симуляцию
simulator.loop()

View File

@@ -1,56 +0,0 @@
asttokens==3.0.0
attrs==25.3.0
certifi==2025.1.31
cffi==1.17.1
colorama==0.4.6
comm==0.2.2
contourpy==1.3.1
cycler==0.12.1
debugpy==1.8.14
decorator==5.2.1
executing==2.2.0
fonttools==4.57.0
h11==0.14.0
idna==3.10
ipykernel==6.29.5
ipython==9.3.0
ipython_pygments_lexers==1.1.1
jedi==0.19.2
jupyter_client==8.6.3
jupyter_core==5.8.1
kiwisolver==1.4.8
matplotlib==3.10.1
matplotlib-inline==0.1.7
nest-asyncio==1.6.0
numpy==2.2.4
opencv-python==4.11.0.86
outcome==1.3.0.post0
packaging==24.2
parso==0.8.4
pillow==11.2.1
platformdirs==4.3.8
prompt_toolkit==3.0.51
psutil==7.0.0
pure_eval==0.2.3
pycparser==2.22
Pygments==2.19.2
pyparsing==3.2.3
PySocks==1.7.1
python-dateutil==2.9.0.post0
pywin32==310
pyzmq==27.0.0
scipy==1.15.2
selenium==4.31.0
six==1.17.0
sniffio==1.3.1
sortedcontainers==2.4.0
stack-data==0.6.3
tornado==6.5.1
traitlets==5.14.3
trio==0.29.0
trio-websocket==0.12.2
typing_extensions==4.13.2
urllib3==2.4.0
wcwidth==0.2.13
websocket-client==1.8.0
wsproto==1.2.0

View File

@@ -1,173 +0,0 @@
import math
from io import BytesIO
from time import sleep
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from autopilot import Pilot
from visualization import VisualizationManager, SimMode
import os
class Simulator:
driver: webdriver.Chrome
mode: SimMode
operatorPilot: Pilot
autonomePilot: Pilot
angle: float
# Менеджер визуализации
viz_manager: VisualizationManager
current_x: float
current_y: float
def __init__(self, operatorPilot: Pilot, autonomePilot: Pilot, viz_manager: VisualizationManager = None):
self.mode = SimMode.OPERATOR
self.operatorPilot = operatorPilot
self.autonomePilot = autonomePilot
# Инициализация переменных для траектории
self.current_x = 0.0
self.current_y = 0.0
# Создаем менеджер визуализации
self.viz_manager = viz_manager
# Передаем менеджер визуализации в автопилот, если он поддерживает это
if hasattr(self.autonomePilot, 'viz_manager'):
self.autonomePilot.viz_manager = self.viz_manager
# Создаем папку для изображений, если её нет
os.makedirs('./images', exist_ok=True)
options = webdriver.ChromeOptions()
# options.add_experimental_option("detach", True)
self.driver = webdriver.Chrome(options)
self.driver.get("https://yandex.ru/maps/43/kazan/?ll=49.103814%2C55.794258&z=14")
sleep(2)
action = ActionChains(self.driver)
# Закрытие левой панели
action.click(self.driver.find_element(By.CLASS_NAME, 'sidebar-toggle-button'))
action.move_to_element_with_offset(self.driver.find_element(By.XPATH, "//div[@class='rounded-controls']/div[@class='rounded-controls__child'][5]//button"), 5, 5)
action.perform()
# Режим спутника
sleep(1)
action.move_to_element_with_offset(self.driver.find_element(By.XPATH, "//div[@class='rounded-controls']/div[@class='rounded-controls__child'][5]//button"), -500, -500)
action.click()
action.perform()
self.angle = 0
def rotate_image_like_drone(self, image: Image.Image, angle: float) -> Image.Image:
"""
Поворачивает картинку как будто съемка ведется с летящего дрона.
Выделяет концентрический квадрат, поворачивает его и извлекает результат.
"""
# Получаем размеры изображения
width, height = image.size
square_size = min(width, height)
cropped_image = image.crop((0, 0, square_size, square_size))
cropped_image = cropped_image.rotate(angle / math.pi * 180, expand=True)
# Определяем размер концентрического квадрата (80% от минимальной стороны)
local_square_size = int(square_size / 2 ** 0.5)
# Вычисляем координаты для центрирования квадрата
left = (cropped_image.width - local_square_size) // 2
top = (cropped_image.height - local_square_size) // 2
right = left + local_square_size
bottom = top + local_square_size
# Вырезаем концентрический квадрат
final_image = cropped_image.crop((left, top, right, bottom))
return final_image
def update_trajectory(self, dx: float, dy: float):
"""
Обновляет траекторию полета беспилотника
"""
# Обновляем текущие координаты
self.current_x += dx
self.current_y += dy
def update_map(self):
"""
Обновляет карту траектории полета
"""
self.viz_manager.update_global_map(self.current_x, self.current_y, self.mode)
def loop(self):
html = self.driver.find_element(By.TAG_NAME, 'html')
action = ActionChains(self.driver)
# Добавляем начальную точку в траекторию
self.update_trajectory(0, 0)
self.viz_manager.update_global_map(self.current_x, self.current_y, self.mode)
for i in range(1000):
signal = None
if self.mode == SimMode.OPERATOR:
signal = self.operatorPilot.act()
if signal is None:
self.mode = SimMode.AUTONOME
print("Режим возвращения домой!")
if self.mode == SimMode.AUTONOME:
signal = self.autonomePilot.act()
if signal is None:
break
dangle, velocity = signal
drone_x, drone_y = self.autonomePilot.get_position()
self.viz_manager.update_error_plot(i, drone_x, drone_y, self.current_x, self.current_y)
# Сдвиг камеры
action = ActionChains(self.driver)
action.move_to_element_with_offset(html, 200, 200)
action.click_and_hold()
self.angle += dangle
dx = math.cos(self.angle) * velocity
dy = math.sin(self.angle) * velocity
action.move_by_offset(-dx, dy)
action.release()
action.perform()
# Обновляем траекторию
self.update_trajectory(dx, dy)
# Загрузка скриншота
png = self.driver.get_screenshot_as_png()
im = Image.open(BytesIO(png))
im = im.crop([0, 80, im.width-80, im.height-60])
# Применяем поворот как будто съемка с дрона
rotated_im = self.rotate_image_like_drone(im, math.pi / 2 - self.angle)
# Передаем изображение в AutoPilot для анализа
self.autonomePilot.handle(rotated_im)
# Обновляем визуализацию каждые несколько итераций для производительности
self.update_map()
self.viz_manager.update_display()
# Финальное обновление карты
self.update_map()
self.viz_manager.update_display()
print("last position: ", self.driver.current_url)
print(f"Финальная позиция: ({self.current_x:.2f}, {self.current_y:.2f})")
# Показываем карту до закрытия, но не поднимаем на передний план
self.viz_manager.show_final()
print("Симуляция завершена. Окно визуализации остается открытым для анализа.")

View File

@@ -1,55 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100.00078395707533 -0.0007839570753276348\n"
]
}
],
"source": [
"h = 100\n",
"R = 6378 * 1000\n",
"\n",
"def solve_x2(a, b, c):\n",
" d = b ** 2 - 4 * a * c\n",
" x1 = (-b - d ** 0.5) / (2 * a)\n",
" x2 = (-b + d ** 0.5) / (2 * a)\n",
" return x1, x2\n",
"\n",
"x1, x2 = solve_x2(2, -2 * (h + R), 2 * h * R + h ** 2)\n",
"\n",
"y = h - x1\n",
"\n",
"print(x1, y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,96 +0,0 @@
#!/usr/bin/env python3
"""
Тестовый скрипт для проверки работы с координатами относительно центра изображения
"""
import numpy as np
from PIL import Image
import cv2
from autopilot import AutoPilot
def test_center_coordinates():
"""Тестирует работу с координатами относительно центра изображения"""
print("Тест координат относительно центра изображения...")
# Создаем тестовое изображение
width, height = 640, 480
test_image = np.zeros((height, width, 3), dtype=np.uint8)
# Добавляем некоторые объекты для отслеживания
cv2.circle(test_image, (width//2, height//2), 50, (255, 255, 255), -1) # Центр
cv2.circle(test_image, (100, 100), 20, (255, 0, 0), -1) # Левый верхний угол
cv2.circle(test_image, (width-100, height-100), 20, (0, 255, 0), -1) # Правый нижний угол
# Создаем второе изображение с небольшим смещением
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
cv2.circle(test_image2, (width//2 + 10, height//2 + 5), 50, (255, 255, 255), -1) # Смещенный центр
cv2.circle(test_image2, (110, 105), 20, (255, 0, 0), -1) # Смещенный левый верхний угол
cv2.circle(test_image2, (width-90, height-95), 20, (0, 255, 0), -1) # Смещенный правый нижний угол
# Конвертируем в PIL Image
pil_image1 = Image.fromarray(test_image)
pil_image2 = Image.fromarray(test_image2)
# Создаем автопилот
pilot = AutoPilot(initial_x=0.0, initial_y=0.0)
print(f"Размер изображения: {width}x{height}")
print(f"Ожидаемый центр: ({width//2}, {height//2})")
# Обрабатываем первое изображение
pilot.handle(pil_image1)
print(f"Центр изображения после первого кадра: {pilot.image_center}")
# Обрабатываем второе изображение
pilot.handle(pil_image2)
print(f"Центр изображения после второго кадра: {pilot.image_center}")
# Получаем состояние дрона
drone_state = pilot.get_drone_state()
print(f"Позиция дрона: ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
print(f"Угол дрона: {drone_state['angle_degrees']:.1f}°")
print("\nАнализ отцентрированных координат:")
print("Ожидаемое смещение центра: (10, 5) пикселей")
print("Ожидаемое смещение левого верхнего угла: (10, 5) пикселей")
print("Ожидаемое смещение правого нижнего угла: (10, 5) пикселей")
print("Тест завершен!")
def test_keypoint_centering():
"""Тестирует отцентрирование ключевых точек"""
print("\nТест отцентрирования ключевых точек...")
# Создаем простое изображение с одной точкой
width, height = 100, 100
test_image = np.zeros((height, width, 3), dtype=np.uint8)
cv2.circle(test_image, (25, 25), 5, (255, 255, 255), -1) # Точка в (25, 25)
# Создаем второе изображение с той же точкой
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
cv2.circle(test_image2, (25, 25), 5, (255, 255, 255), -1)
# Конвертируем в PIL Image
pil_image1 = Image.fromarray(test_image)
pil_image2 = Image.fromarray(test_image2)
# Создаем автопилот
pilot = AutoPilot(initial_x=0.0, initial_y=0.0)
print(f"Размер изображения: {width}x{height}")
print(f"Центр изображения: ({width//2}, {height//2}) = (50, 50)")
print(f"Позиция точки: (25, 25)")
print(f"Ожидаемые отцентрированные координаты: (25-50, 25-50) = (-25, -25)")
# Обрабатываем изображения
pilot.handle(pil_image1)
pilot.handle(pil_image2)
drone_state = pilot.get_drone_state()
print(f"Результат: позиция дрона ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
print("Тест завершен!")
if __name__ == "__main__":
test_center_coordinates()
test_keypoint_centering()

View File

@@ -1,204 +0,0 @@
#!/usr/bin/env python3
"""
Тестовый скрипт для проверки системы отслеживания траектории беспилотника
"""
import numpy as np
from PIL import Image
import cv2
from autopilot import AutoPilot, RandomPilot
from simulator import Simulator, SimMode
import matplotlib.pyplot as plt
import time
def create_test_image(width=640, height=480, pattern_type="random"):
"""Создает тестовое изображение с различными паттернами"""
if pattern_type == "random":
# Создаем изображение с случайными точками
img = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
# Добавляем несколько четких объектов
cv2.rectangle(img, (100, 100), (200, 200), (255, 0, 0), -1)
cv2.circle(img, (400, 300), 50, (0, 255, 0), -1)
cv2.line(img, (50, 400), (550, 400), (0, 0, 255), 5)
elif pattern_type == "grid":
# Создаем сетку
img = np.zeros((height, width, 3), dtype=np.uint8)
for i in range(0, width, 50):
cv2.line(img, (i, 0), (i, height), (255, 255, 255), 1)
for i in range(0, height, 50):
cv2.line(img, (0, i), (width, i), (255, 255, 255), 1)
elif pattern_type == "circles":
# Создаем изображение с кругами
img = np.zeros((height, width, 3), dtype=np.uint8)
for i in range(5):
x = np.random.randint(50, width-50)
y = np.random.randint(50, height-50)
radius = np.random.randint(20, 80)
color = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))
cv2.circle(img, (x, y), radius, color, -1)
return Image.fromarray(img)
def simulate_drone_movement(base_image, dx, dy, rotation=0):
"""Симулирует движение БПЛА, создавая смещенное изображение"""
img_array = np.array(base_image)
height, width = img_array.shape[:2]
# Создаем матрицу трансформации
cos_rot = np.cos(rotation)
sin_rot = np.sin(rotation)
# Матрица поворота и смещения
M = np.float32([
[cos_rot, -sin_rot, dx],
[sin_rot, cos_rot, dy]
])
# Применяем трансформацию
transformed = cv2.warpAffine(img_array, M, (width, height),
borderMode=cv2.BORDER_REPLICATE)
return Image.fromarray(transformed)
def test_drone_tracking():
"""Тестирует систему отслеживания БПЛА"""
print("Запуск тестирования системы отслеживания БПЛА...")
# Создаем автопилот
autopilot = AutoPilot(initial_x=0.0, initial_y=0.0, initial_altitude=10.0)
# Создаем базовое изображение
base_image = create_test_image(pattern_type="circles")
# Траектория движения (dx, dy, rotation)
trajectory = [
(10, 0, 0), # Движение вперед
(10, 5, 0.1), # Движение вперед с небольшим поворотом
(0, 10, 0), # Движение вправо
(-5, 5, -0.1), # Движение назад-вправо с поворотом
(5, -5, 0.05), # Движение вперед-влево
(0, -10, 0), # Движение влево
(-10, 0, 0), # Движение назад
]
# Массивы для хранения истории позиций
x_history = []
y_history = []
angle_history = []
# Обрабатываем каждое изображение в траектории
for i, (dx, dy, rotation) in enumerate(trajectory):
print(f"\n--- Кадр {i+1} ---")
# Создаем смещенное изображение
current_image = simulate_drone_movement(base_image, dx, dy, rotation)
# Обрабатываем изображение
autopilot.handle(current_image)
# Получаем состояние БПЛА
state = autopilot.get_drone_state()
x_history.append(state['x'])
y_history.append(state['y'])
angle_history.append(state['angle_degrees'])
# Обновляем базовое изображение для следующего кадра
base_image = current_image
# Небольшая пауза для визуализации
time.sleep(0.5)
# Закрываем окно OpenCV
cv2.destroyAllWindows()
# Визуализируем траекторию
plot_trajectory(x_history, y_history, angle_history)
def plot_trajectory(x_history, y_history, angle_history):
"""Визуализирует траекторию движения БПЛА"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# График траектории
ax1.plot(x_history, y_history, 'b-o', linewidth=2, markersize=8)
ax1.plot(x_history[0], y_history[0], 'go', markersize=12, label='Начало')
ax1.plot(x_history[-1], y_history[-1], 'ro', markersize=12, label='Конец')
ax1.set_xlabel('X координата (м)')
ax1.set_ylabel('Y координата (м)')
ax1.set_title('Траектория движения БПЛА')
ax1.grid(True)
ax1.legend()
ax1.axis('equal')
# График угла поворота
ax2.plot(range(len(angle_history)), angle_history, 'r-o', linewidth=2, markersize=8)
ax2.set_xlabel('Номер кадра')
ax2.set_ylabel('Угол поворота (градусы)')
ax2.set_title('Изменение угла поворота БПЛА')
ax2.grid(True)
plt.tight_layout()
plt.show()
def test_single_frame():
"""Тестирует обработку одного кадра"""
print("Тестирование обработки одного кадра...")
autopilot = AutoPilot()
# Создаем два похожих изображения
img1 = create_test_image(pattern_type="circles")
img2 = simulate_drone_movement(img1, 15, 10, 0.05)
# Обрабатываем первое изображение
autopilot.handle(img1)
# Обрабатываем второе изображение
autopilot.handle(img2)
# Получаем состояние
state = autopilot.get_drone_state()
print(f"Позиция БПЛА: ({state['x']:.2f}, {state['y']:.2f})")
print(f"Угол БПЛА: {state['angle_degrees']:.1f}°")
cv2.destroyAllWindows()
def test_trajectory_tracking():
"""Тестирует систему отслеживания траектории"""
print("Запуск теста системы отслеживания траектории...")
# Создаем пилотов
operator_pilot = RandomPilot(velocity=1.0)
autonome_pilot = AutoPilot(initial_x=0.0, initial_y=0.0)
# Создаем симулятор
simulator = Simulator(operator_pilot, autonome_pilot)
try:
# Запускаем симуляцию
simulator.loop()
print("Симуляция завершена!")
print(f"Всего точек траектории: {len(simulator.trajectory_x)}")
print(f"Режим оператора: {sum(1 for mode in simulator.trajectory_modes if mode == SimMode.OPERATOR)} точек")
print(f"Автономный режим: {sum(1 for mode in simulator.trajectory_modes if mode == SimMode.AUTONOME)} точек")
except Exception as e:
print(f"Ошибка во время симуляции: {e}")
finally:
# Закрываем браузер
simulator.driver.quit()
if __name__ == "__main__":
print("Выберите тест:")
print("1. Тест одного кадра")
print("2. Тест полной траектории")
choice = input("Введите номер теста (1 или 2): ").strip()
if choice == "1":
test_single_frame()
elif choice == "2":
test_trajectory_tracking()
else:
print("Неверный выбор. Запускаю тест одного кадра...")
test_single_frame()

View File

@@ -1,65 +0,0 @@
#!/usr/bin/env python3
"""
Тестовый скрипт для проверки единой системы визуализации
"""
import numpy as np
from PIL import Image
import cv2
from autopilot import AutoPilot, RandomPilot
from simulator import Simulator
from visualization import VisualizationManager, SimMode
def test_unified_visualization():
"""Тестирует единую систему визуализации"""
print("Тест единой системы визуализации...")
# Создаем менеджер визуализации
viz_manager = VisualizationManager("Test - Unified Visualization")
# Создаем автопилот с менеджером визуализации
autonome_pilot = AutoPilot(initial_x=0.0, initial_y=0.0, viz_manager=viz_manager)
# Создаем симулятор
simulator = Simulator(RandomPilot(), autonome_pilot)
# Создаем тестовые изображения
width, height = 640, 480
test_image1 = np.zeros((height, width, 3), dtype=np.uint8)
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
# Добавляем объекты для отслеживания
cv2.circle(test_image1, (width//2, height//2), 50, (255, 255, 255), -1)
cv2.circle(test_image1, (100, 100), 20, (255, 0, 0), -1)
cv2.circle(test_image2, (width//2 + 10, height//2 + 5), 50, (255, 255, 255), -1)
cv2.circle(test_image2, (110, 105), 20, (255, 0, 0), -1)
# Конвертируем в PIL Image
pil_image1 = Image.fromarray(test_image1)
pil_image2 = Image.fromarray(test_image2)
print("Обрабатываем первое изображение...")
autonome_pilot.handle(pil_image1)
print("Обрабатываем второе изображение...")
autonome_pilot.handle(pil_image2)
# Обновляем визуализацию
viz_manager.update_display()
# Получаем состояние дрона
drone_state = autonome_pilot.get_drone_state()
print(f"Позиция дрона: ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
print(f"Угол дрона: {drone_state['angle_degrees']:.1f}°")
print("Тест завершен! Окно визуализации должно показывать:")
print("- Глобальную карту с траекторией")
print("- Детекцию ключевых точек")
print("- Сопоставление точек между кадрами")
# Показываем финальное состояние
viz_manager.show_final()
if __name__ == "__main__":
test_unified_visualization()

View File

@@ -1,67 +0,0 @@
#!/usr/bin/env python3
"""
Упрощенный тест для проверки системы визуализации без браузера
"""
import numpy as np
from PIL import Image
import cv2
from autopilot import AutoPilot
from visualization import VisualizationManager, SimMode
def test_visualization_only():
"""Тестирует только систему визуализации без симулятора"""
print("Тест системы визуализации...")
# Создаем менеджер визуализации
viz_manager = VisualizationManager("Test - Visualization Only")
# Создаем автопилот с менеджером визуализации
autonome_pilot = AutoPilot(initial_x=0.0, initial_y=0.0, viz_manager=viz_manager)
# Создаем тестовые изображения
width, height = 640, 480
test_image1 = np.zeros((height, width, 3), dtype=np.uint8)
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
# Добавляем объекты для отслеживания
cv2.circle(test_image1, (width//2, height//2), 50, (255, 255, 255), -1)
cv2.circle(test_image1, (100, 100), 20, (255, 0, 0), -1)
cv2.circle(test_image1, (width-100, height-100), 20, (0, 255, 0), -1)
cv2.circle(test_image2, (width//2 + 10, height//2 + 5), 50, (255, 255, 255), -1)
cv2.circle(test_image2, (110, 105), 20, (255, 0, 0), -1)
cv2.circle(test_image2, (width-90, height-95), 20, (0, 255, 0), -1)
# Конвертируем в PIL Image
pil_image1 = Image.fromarray(test_image1)
pil_image2 = Image.fromarray(test_image2)
print("Обрабатываем первое изображение...")
autonome_pilot.handle(pil_image1)
print("Обрабатываем второе изображение...")
autonome_pilot.handle(pil_image2)
# Симулируем движение дрона
print("Симулируем движение дрона...")
for i in range(10):
# Обновляем глобальную карту
viz_manager.update_global_map(i * 10, i * 5, SimMode.OPERATOR if i < 5 else SimMode.AUTONOME)
viz_manager.update_display()
# Получаем состояние дрона
drone_state = autonome_pilot.get_drone_state()
print(f"Позиция дрона: ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
print(f"Угол дрона: {drone_state['angle_degrees']:.1f}°")
print("Тест завершен! Окно визуализации должно показывать:")
print("- Глобальную карту с траекторией (синий - оператор, красный - автономный)")
print("- Детекцию ключевых точек на текущем кадре")
print("- Сопоставление точек между кадрами")
# Показываем финальное состояние
viz_manager.show_final()
if __name__ == "__main__":
test_visualization_only()

View File

@@ -1,277 +0,0 @@
#!/usr/bin/env python3
"""
Модуль для управления общим окном визуализации
"""
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from enum import Enum
import cv2
from PIL import Image
import matplotlib
# Настройки matplotlib
matplotlib.use('TkAgg')
plt.rcParams['figure.raise_window'] = False
class SimMode(Enum):
OPERATOR = 1
AUTONOME = 2
class VisualizationManager:
"""
Менеджер для управления общим окном визуализации
"""
def __init__(self, window_title="Drone Autopilot Visualization"):
self.window_title = window_title
self.fig = None
self.ax_error_plot = None # График погрешности позиции
self.ax_global_map = None
self.ax_detection = None
self.ax_matches = None
# Данные для глобальной карты
self.trajectory_x = []
self.trajectory_y = []
self.trajectory_modes = []
self.current_x = 0.0
self.current_y = 0.0
# Данные для траектории БПЛА (его собственное видение)
self.drone_trajectory_x = []
self.drone_trajectory_y = []
# Данные для графика погрешности
self.error_times = []
self.position_errors = []
# Данные для детекции
self.current_frame = None
self.keypoints = []
self.matches = []
self._setup_window()
def _setup_window(self):
"""Настраивает общее окно с несколькими областями"""
plt.ion()
self.fig = plt.figure(figsize=(16, 10))
self.fig.canvas.manager.window.title(self.window_title)
# Открываем окно на полный экран
self.fig.canvas.manager.window.state('zoomed')
# Создаем сетку 2x2 с разными размерами колонок
gs = self.fig.add_gridspec(2, 2, hspace=0.3, wspace=0.3, width_ratios=[1, 1])
# График погрешности позиции (левый верхний угол)
self.ax_error_plot = self.fig.add_subplot(gs[0, 0])
self.ax_error_plot.set_title('Погрешность позиции от времени')
self.ax_error_plot.set_xlabel('Время (кадры)')
self.ax_error_plot.set_ylabel('Погрешность (метры)')
self.ax_error_plot.grid(True, alpha=0.3)
# Глобальная карта (левый нижний угол)
self.ax_global_map = self.fig.add_subplot(gs[1, 0])
self.ax_global_map.set_title('Global Map - Траектория полета беспилотника')
self.ax_global_map.set_xlabel('X координата')
self.ax_global_map.set_ylabel('Y координата')
self.ax_global_map.grid(True, alpha=0.3)
self.ax_global_map.axhline(y=0, color='k', linestyle='-', alpha=0.3)
self.ax_global_map.axvline(x=0, color='k', linestyle='-', alpha=0.3)
# Детекция ключевых точек (правый верхний угол)
self.ax_detection = self.fig.add_subplot(gs[0, 1])
self.ax_detection.set_title('Keypoint Detection')
self.ax_detection.axis('off')
# Сопоставление точек (правый нижний угол)
self.ax_matches = self.fig.add_subplot(gs[1, 1])
self.ax_matches.set_title('Feature Matching')
self.ax_matches.axis('off')
# Настройки окна
self.fig.canvas.manager.window.attributes('-topmost', False)
plt.tight_layout()
def update_global_map(self, x: float, y: float, mode: SimMode):
"""Обновляет глобальную карту"""
self.current_x = x
self.current_y = y
self.trajectory_x.append(x)
self.trajectory_y.append(y)
self.trajectory_modes.append(mode)
self.ax_global_map.clear()
self.ax_global_map.set_title('Global Map - Траектория полета беспилотника')
self.ax_global_map.set_xlabel('X координата')
self.ax_global_map.set_ylabel('Y координата')
self.ax_global_map.grid(True, alpha=0.3)
self.ax_global_map.axhline(y=0, color='k', linestyle='-', alpha=0.3)
self.ax_global_map.axvline(x=0, color='k', linestyle='-', alpha=0.3)
if len(self.trajectory_x) > 1:
# Разделяем траекторию по режимам
operator_indices = [i for i, m in enumerate(self.trajectory_modes) if m == SimMode.OPERATOR]
autonome_indices = operator_indices[-1:] + [i for i, m in enumerate(self.trajectory_modes) if m == SimMode.AUTONOME]
# Рисуем траекторию оператора (синий цвет)
if len(operator_indices) > 1:
operator_x = [self.trajectory_x[i] for i in operator_indices]
operator_y = [self.trajectory_y[i] for i in operator_indices]
self.ax_global_map.plot(operator_x, operator_y, 'b-', linewidth=2, label='Режим оператора')
# Рисуем траекторию автономного режима (красный цвет)
if len(autonome_indices) > 1:
autonome_x = [self.trajectory_x[i] for i in autonome_indices]
autonome_y = [self.trajectory_y[i] for i in autonome_indices]
self.ax_global_map.plot(autonome_x, autonome_y, 'r-', linewidth=2, label='Автономный режим')
# Рисуем траекторию БПЛА (пунктирная линия, тонкая)
if len(self.drone_trajectory_x) > 1:
self.ax_global_map.plot(self.drone_trajectory_x, self.drone_trajectory_y,
'g--', linewidth=1, alpha=0.7, label='Данные по одометрии')
# Рисуем начальную точку (зеленая)
self.ax_global_map.plot(self.trajectory_x[0], self.trajectory_y[0], 'go', markersize=8, label='Начальная точка')
# Рисуем текущую позицию (черная)
self.ax_global_map.plot(self.current_x, self.current_y, 'ko', markersize=6, label='Текущая позиция')
# Рисуем целевую точку (0, 0) - желтая
self.ax_global_map.plot(0, 0, 'yo', markersize=8, label='Цель (0, 0)')
self.ax_global_map.legend()
# Автоматически масштабируем оси
if len(self.trajectory_x) > 0:
margin = 50
x_min, x_max = min(self.trajectory_x), max(self.trajectory_x)
y_min, y_max = min(self.trajectory_y), max(self.trajectory_y)
# Учитываем также траекторию БПЛА при масштабировании
if len(self.drone_trajectory_x) > 0:
x_min = min(x_min, min(self.drone_trajectory_x))
x_max = max(x_max, max(self.drone_trajectory_x))
y_min = min(y_min, min(self.drone_trajectory_y))
y_max = max(y_max, max(self.drone_trajectory_y))
x_min = min(x_min, 0)
x_max = max(x_max, 0)
y_min = min(y_min, 0)
y_max = max(y_max, 0)
self.ax_global_map.set_xlim(x_min - margin, x_max + margin)
self.ax_global_map.set_ylim(y_min - margin, y_max + margin)
def update_drone_trajectory(self, drone_x: float, drone_y: float):
"""Обновляет траекторию БПЛА (его собственное видение позиции)"""
self.drone_trajectory_x.append(drone_x)
self.drone_trajectory_y.append(drone_y)
def update_error_plot(self, frame_count: int, drone_x: float, drone_y: float, true_x: float, true_y: float):
"""Обновляет график погрешности позиции"""
# Вычисляем погрешность как расстояние между реальной и предполагаемой позицией
error = np.sqrt((drone_x - true_x)**2 + (drone_y - true_y)**2)
self.error_times.append(frame_count)
self.position_errors.append(error)
self.ax_error_plot.clear()
self.ax_error_plot.set_title('Погрешность позиции от времени')
self.ax_error_plot.set_xlabel('Время (кадры)')
self.ax_error_plot.set_ylabel('Погрешность (метры)')
self.ax_error_plot.grid(True, alpha=0.3)
if len(self.error_times) > 1:
self.ax_error_plot.plot(self.error_times, self.position_errors, 'b-', linewidth=2)
# Автоматически масштабируем оси
if len(self.position_errors) > 0:
margin = 0.1
error_min, error_max = min(self.position_errors), max(self.position_errors)
if error_max > error_min:
self.ax_error_plot.set_ylim(0, error_max + margin)
else:
self.ax_error_plot.set_ylim(0, 1)
def update_detection(self, image: np.ndarray, keypoints):
"""Обновляет визуализацию детекции ключевых точек"""
self.current_frame = image.copy()
self.keypoints = keypoints
self.ax_detection.clear()
self.ax_detection.set_title('Keypoint Detection')
if image is not None:
# Конвертируем BGR в RGB для matplotlib
if len(image.shape) == 3 and image.shape[2] == 3:
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
else:
image_rgb = image
self.ax_detection.imshow(image_rgb)
# Рисуем ключевые точки
if keypoints:
kp_coords = np.array([kp.pt for kp in keypoints])
self.ax_detection.scatter(kp_coords[:, 0], kp_coords[:, 1],
c='red', s=20, alpha=0.7, marker='o')
self.ax_detection.axis('off')
def update_matches(self, img1: np.ndarray, img2: np.ndarray,
kp1, kp2, matches, transformation_info=None):
"""Обновляет визуализацию сопоставления точек"""
self.ax_matches.clear()
self.ax_matches.set_title('Feature Matching')
if img1 is not None and img2 is not None and matches:
# Рисуем сопоставления
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches, None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
# Конвертируем BGR в RGB
if len(img_matches.shape) == 3 and img_matches.shape[2] == 3:
img_matches_rgb = cv2.cvtColor(img_matches, cv2.COLOR_BGR2RGB)
else:
img_matches_rgb = img_matches
self.ax_matches.imshow(img_matches_rgb)
# Добавляем информацию о трансформации
if transformation_info:
tx, ty = transformation_info['translation']
angle = transformation_info['rotation']
info_text = f"Translation: ({tx:.2f}, {ty:.2f})"
info_text2 = f"Rotation: {angle:.2f} rad ({np.degrees(angle):.1f}°)"
self.ax_matches.text(10, 30, info_text, fontsize=8, color='green',
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
self.ax_matches.text(10, 90, info_text2, fontsize=8, color='green',
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
self.ax_matches.axis('off')
def update_display(self):
"""Обновляет отображение всех областей"""
self.fig.canvas.draw()
self.fig.canvas.flush_events()
plt.pause(0.01)
def close(self):
"""Закрывает окно"""
plt.close(self.fig)
def show_final(self):
"""Показывает финальное состояние окна"""
plt.ioff()
print("Симуляция завершена. Окно визуализации остается открытым для анализа.")
plt.pause(100000)
def pause(self, duration: float):
plt.pause(duration)

164
position.py Normal file
View File

@@ -0,0 +1,164 @@
import cv2
import math
import numpy as np
from numpy.linalg import inv
from typing import Optional
import constants
from scipy.spatial.transform import Rotation
import utility
class Position:
"""Класс позиции с полной ориентацией БПЛА в 3D пространстве"""
x: float # Координата X
y: float # Координата Y
z: float # Масштаб
yaw: float # Рыскание (rotation around Z-axis)
pitch: float # Тангаж (rotation around Y-axis)
roll: float # Крен (rotation around X-axis)
def __init__(
self,
x: float = 0,
y: float = 0,
z: float = 1,
yaw: float = 0,
pitch: float = 0,
roll: float = 0
):
self.x = x
self.y = y
self.z = z
self.yaw = yaw
self.pitch = pitch
self.roll = roll
def __str__(self) -> str:
return (
f"Position(x={self.x:.2f}, y={self.y:.2f}, z={self.z:.2f}, "
f"yaw={math.degrees(self.yaw):.1f}°, "
f"pitch={math.degrees(self.pitch):.1f}°, "
f"roll={math.degrees(self.roll):.1f}°)"
)
def __imul__(self, scalar: float):
self.x *= scalar
self.y *= scalar
self.z *= scalar
return self
def __mul__(self, scalar: float) -> 'Position':
pos = self.copy()
pos *= scalar
return pos
def __itruediv__(self, scalar: float):
self.x /= scalar
self.y /= scalar
self.z /= scalar
return self
def __truediv__(self, scalar: float) -> 'Position':
pos = self.copy()
pos /= scalar
return pos
def get_homography_matrix(self, K_in: np.ndarray = constants.K, K_out: np.ndarray | None = None, sliding: bool = True) -> np.ndarray:
""" Возвращает матрицу гомографии """
R = self.get_rotation_matrix()
T = self.get_translation_matrix(K_in)
if not sliding:
T[0, 2] = T[1, 2] = 0
if K_out is None: K_out = K_in
return K_out @ R @ T @ np.linalg.inv(K_in)
def copy(self) -> 'Position':
"""Создает полную копию объекта"""
return Position(self.x, self.y, self.z, self.yaw, self.pitch, self.roll)
def get_translation_matrix(self, K: np.ndarray = constants.K) -> np.ndarray:
return np.array([
[1, 0, self.x / K[0][0]],
[0, 1, self.y / K[0][0]],
[0, 0, self.z]
])
def get_rotation_matrix(self) -> np.ndarray:
"""
Матрица вращения с порядком применения: yaw → pitch → roll
Умножение: R = Rx(roll) * Ry(pitch) * Rz(yaw)
"""
cy, sy = math.cos(self.yaw), math.sin(self.yaw)
cp, sp = math.cos(self.pitch), math.sin(self.pitch)
cr, sr = math.cos(self.roll), math.sin(self.roll)
Rz = np.array([
[cy, -sy, 0],
[sy, cy, 0],
[0, 0, 1],
])
Ry = np.array([
[cp, 0, sp],
[0, 1, 0],
[-sp, 0, cp],
])
Rx = np.array([
[1, 0, 0],
[0, cr, -sr],
[0, sr, cr],
])
return Rx @ Ry @ Rz
def iapply(self, homography_matrix: np.ndarray, K = constants.K) -> 'Position':
"""Применяет матрицу трансформации для вычисления новой позиции и ориентации."""
np.set_printoptions(suppress=True)
H = homography_matrix @ self.get_homography_matrix(sliding=False)
# Decompose homography
_, R, t, _ = cv2.decomposeHomographyMat(H, K)
R = np.array(R)
t = np.array(t)
# print(cv2.decomposeHomographyMat(inv(H), K))
# _, _, z, _ = cv2.decomposeHomographyMat(inv(H), K)
# print(z)
# z = z.copy()
# z *= constants._K_FOCUS_DISTANCE
# print(z)
T = inv(R) @ inv(K) @ H @ K
ind = np.array([A[2][0] ** 2 + A[2][1] ** 2 for A in T])
top_k = max(1, len(T) // 2)
if (len(T) == 3): raise "len(T) == 3"
ind = np.argpartition(ind, top_k - 1)[:top_k]
T = T[ind[0]]
T = T @ np.array([0, 0, 1]) / np.mean((T[0][0], T[1][1]))
T[2] -= 1
R = R[ind]
t = t[ind]
best_id = ((t - T) ** 2).sum((1, 2)).argmin()
R = R[best_id]
rot = Rotation.from_matrix(R).as_euler('XYZ').flatten()
self.roll = min(np.radians(5), max(np.radians(-5), rot[0]))
self.pitch = min(np.radians(5), max(np.radians(-5), rot[1]))
self.yaw = rot[2]
t = t[best_id].flatten()
self.x -= T[0] * constants._K_FOCUS_DISTANCE
self.y += T[1] * constants._K_FOCUS_DISTANCE
self.z = max(0.7, min(1.3, 1 + T[2]))
T[0] *= constants._K_FOCUS_DISTANCE
T[1] *= constants._K_FOCUS_DISTANCE
def apply(self, homography_matrix: np.ndarray, K = constants.K) -> 'Position':
"""Применяет матрицу трансформации для вычисления новой позиции и ориентации."""
pos = self.copy()
pos.iapply(homography_matrix, K)
return pos

View File

@@ -1,113 +1,99 @@
import math
import os
from io import BytesIO
from time import sleep
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from autopilot import Pilot
from geolocation import Geolocation
import cv2
import numpy as np
from position import Position
from vision_chunk import VisionChunk
from visualization import VisualizationManager, SimMode
from yandex_map import YandexMap
from PIL import Image
import os
from google_map import GoogleMap
import constants
import utility
class Simulator:
geo: Geolocation
yandexMap: YandexMap
def __init__(self, online_map: YandexMap | GoogleMap = None):
self.online_map = online_map
# Используем новый конструктор с yaw, pitch, roll
self.pos = Position(x=0, y=0, z=1, yaw=0, pitch=0, roll=0)
# Менеджер визуализации
viz_manager: VisualizationManager
def __init__(self, yandexMap: YandexMap = None):
self.yandexMap = yandexMap
# Инициализация переменных для отслеживания траектории
self.geo = Geolocation(0, 0, 1, 0)
# Создаем папку для изображений, если её нет
os.makedirs('./images', exist_ok=True)
def rotate_image_like_drone(self, image: Image.Image, angle: float) -> Image.Image:
"""
Поворачивает картинку как будто съемка ведется с летящего дрона.
Выделяет концентрический квадрат, поворачивает его и извлекает результат.
"""
# Получаем размеры изображения
width, height = image.size
square_size = min(width, height)
off_x = (width - square_size) / 2
off_y = (height - square_size) / 2
def set_pitch(self, pitch_deg: float):
"""Установить тангаж камеры (градусы, -10 до 10)"""
clamped_pitch = max(-10, min(10, pitch_deg))
self.pos.pitch = math.radians(clamped_pitch)
cropped_image = image.crop((off_x, off_y, off_x + square_size, off_y + square_size))
cropped_image = cropped_image.rotate(angle / math.pi * 180, expand=True)
# Определяем размер концентрического квадрата (80% от минимальной стороны)
local_square_size = int(square_size / 2 ** 0.5)
# Вычисляем координаты для центрирования квадрата
left = (cropped_image.width - local_square_size) // 2
top = (cropped_image.height - local_square_size) // 2
right = left + local_square_size
bottom = top + local_square_size
# Вырезаем концентрический квадрат
final_image = cropped_image.crop((left, top, right, bottom))
def set_roll(self, roll_deg: float):
"""Установить крен камеры (градусы, -10 до 10)"""
clamped_roll = max(-10, min(10, roll_deg))
self.pos.roll = math.radians(clamped_roll)
return final_image
def _apply_perspective_transform(self, image: Image.Image) -> Image.Image:
"""
Применяет перспективную трансформацию к изображению.
Возвращает квадратное изображение 700x700.
"""
img_array = np.array(image)
h, w, _ = img_array.shape
# Применяем трансформацию
pos = self.pos.copy()
pos.x = 0
pos.y = 0
K_in = utility.calc_camera_matrix(w, h)
K_out = constants.K
H = pos.get_homography_matrix(K_in, K_out)
shape = (constants.CHUNK_WIDTH, constants.CHUNK_WIDTH)
transformed = cv2.warpPerspective(img_array, H, shape)
return Image.fromarray(transformed)
def update_trajectory(self, dx: float, dy: float):
"""
Обновляет траекторию полета беспилотника
"""
# Обновляем текущие координаты
self.geo.x += dx * self.geo.z
self.geo.y += dy * self.geo.z
def update_map(self):
"""
Обновляет карту траектории полета
"""
self.viz_manager.update_global_map(self.geo.x, self.geo.y, self.mode)
"""Обновляет координаты дрона"""
self.pos.x += dx
self.pos.y += dy
def handle(self, dangle: float, velocity: float = 50) -> None:
""" Сдвиг камеры """
html = self.yandexMap.driver.find_element(By.TAG_NAME, 'html')
"""
Управление движением дрона.
dangle - изменение угла курса (радианы)
velocity - скорость движения
"""
action = ActionChains(self.yandexMap.driver)
action.move_to_element_with_offset(html, 200, 200)
action.click_and_hold()
self.geo.angle += dangle
# print(f" [Simulator] angle: {self.angle / math.pi * 180:.1f}°")
# Обновляем yaw в объекте Position
self.pos.yaw += dangle
velocity = max(velocity, 10)
dx = math.cos(math.pi / 2 + self.geo.angle) * velocity / self.geo.z
dy = math.sin(math.pi / 2 + self.geo.angle) * velocity / self.geo.z
# print(" [Simulator] dx, dy:", [dx, dy])
# Вычисляем смещение на основе текущего yaw
dx = int(math.cos(math.pi / 2 + self.pos.yaw) * velocity)
dy = int(math.sin(math.pi / 2 + self.pos.yaw) * velocity)
self.update_trajectory(dx, dy)
action.move_by_offset(-dx, dy)
action.release()
action.perform()
# print(f" [Simulator] Position: {self.current_x}, {self.current_y}")
def change_zoom(self, direction: str = 'down'):
""" Изменение масштаба """
self.yandexMap.scroll(0.5, 0.5, 2, direction == 'down')
sleep(0.4)
if direction == 'down':
self.geo.z /= 2
else:
self.geo.z *= 2
self.online_map.move(dx, dy)
def set_zoom(self, zoom_level: float):
"""Программное изменение масштаба"""
self.pos.z = zoom_level
def get_chunk(self) -> VisionChunk:
png = self.yandexMap.driver.get_screenshot_as_png()
im = Image.open(BytesIO(png))
"""Получить текущий снимок с камеры дрона"""
im = self.online_map.make_screenshot()
# Применяем поворот как будто съемка с дрона
rotated_im = self.rotate_image_like_drone(im, -self.geo.angle)
return VisionChunk(rotated_im)
# Применяем перспективную трансформацию
transformed_im = self._apply_perspective_transform(im)
return VisionChunk(transformed_im)
def get_orientation_info(self) -> dict:
"""Получить полную информацию об ориентации дрона"""
return {
'position': (self.pos.x, self.pos.y, self.pos.z),
'yaw_deg': math.degrees(self.pos.yaw),
'pitch_deg': math.degrees(self.pos.pitch),
'roll_deg': math.degrees(self.pos.roll),
'rotation_matrix': self.pos.get_rotation_matrix()
}

File diff suppressed because one or more lines are too long

189
test_chunks.ipynb Normal file

File diff suppressed because one or more lines are too long

1004
test_projection.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -29,3 +29,9 @@ class Timer:
self.elapsed = 0.
self.enabled = False
self.last_enabled = 0.
def loop(self) -> float:
v = self.get_diff()
self.stop()
self.start()
return v

24
todo.md
View File

@@ -2,16 +2,16 @@
[!] Проверка корректности выявления ориентира на кадре
[!] Исправление коррекции координат на основе сопоставления с ориентиром
[-] FPS счетчик
| [-] Оптимизация детекции точек
[-] Оформление статистики при тестовых запусках
[-] Проведение тестовых запусков
[-] Оформление отчета
[-] Эксперименты с разными детекторами (SIFT, KAZE)
[+] FPS счетчик
| [+] Оптимизация детекции точек
[+] Оформление статистики при тестовых запусках
[+] Проведение тестовых запусков
[+] Оформление отчета
[+] Эксперименты с разными детекторами (SIFT, KAZE)
[?] Изменение масштаба во время полёта, обработка этой трансформации
[?] Поворот ориентиров
[?] Ограничение выбора точек при построении маршрута, чтобы ориентиры полностью попадали в кадр
[+] Изменение масштаба во время полёта, обработка этой трансформации
[+] Поворот ориентиров
[+] Ограничение выбора точек при построении маршрута, чтобы ориентиры полностью попадали в кадр
[+] График межкадрового смещения
| [+] График межкадровых смещениях по версии матрицы гомографии
@@ -19,6 +19,6 @@
| [+] Исследовать причину погрешности при развороте
[+] Устранение большой погрешности при повороте
[ ] Переделать ключевые точки -> Optical Flow
[ ] Добавить перспективу
[ ] Эталоны на Google Maps, полёт тот же
[+] Переделать ключевые точки -> Optical Flow
[+] Добавить перспективу
[+] Эталоны на Google Maps, полёт тот же

View File

@@ -1,6 +1,11 @@
from PIL import Image
from datetime import datetime
from urllib.parse import parse_qs, urlparse, unquote
import constants
import cv2
import numpy as np
import re
def cv2_to_pil(cv_image: np.ndarray) -> Image.Image:
"""
@@ -12,4 +17,153 @@ def cv2_to_pil(cv_image: np.ndarray) -> Image.Image:
def image_to_numpy(self, image: Image.Image) -> np.ndarray:
"""Конвертирует PIL Image в numpy array для OpenCV"""
return np.array(image)
return np.array(image)
def get_normals(H: np.ndarray, R: np.ndarray, T: np.ndarray) -> np.ndarray:
n = cv2.decomposeHomographyMat(H, constants.K, R, T)
return n
def estimate_transformation_matrix(src_pts: np.ndarray, dst_pts: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""Оценивает матрицу трансформации на основе сопоставленных точек"""
# Используем RANSAC для оценки матрицы гомографии
return cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0, maxIters=1000)
def calc_camera_matrix(w: float, h: float):
f = constants._K_FOCUS_DISTANCE
return np.array([
[f, 0, w / 2],
[0, f, h / 2],
[0, 0, 1]
])
def generate_folder_name():
"""
Генерирует название для папки с текущей датой и временем до секунд.
Формат: YYYY-MM-DD_HH-MM-SS
"""
now = datetime.now()
folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
return folder_name
def parse_yandex_maps_url(url):
"""
Парсит URL Яндекс.Карт и извлекает lat, lon и zoom
Формат: ?ll=lon,lat&z=zoom
"""
# Декодируем URL (на случай %2C вместо запятых)
url = unquote(url)
# Парсим URL
parsed_url = urlparse(url)
params = parse_qs(parsed_url.query)
if 'll' in params and 'z' in params:
# ll содержит "lon,lat"
ll_value = params['ll'][0]
lat, lon = map(float, ll_value.split(','))
zoom = int(params['z'][0])
return {
'lat': lat,
'lon': lon,
'zoom': zoom
}
return None
def parse_google_maps_url(url):
"""
Парсит URL Google Maps и извлекает lat, lon и zoom
Формат: /@lat,lon,zoom[m|z]
"""
pattern = r'/@([-\d.]+),([-\d.]+),(\d+)([mz])'
match = re.search(pattern, url)
if match:
lon = float(match.group(1))
lat = float(match.group(2))
zoom_value = int(match.group(3))
zoom_unit = match.group(4)
return {
'lat': lat,
'lon': lon,
'zoom': zoom_value,
'zoom_unit': zoom_unit
}
return None
def google_map_js_move_script(dx, dy) -> str:
return """
async function sleep(ms) {
return new Promise((resolve, reject) => {
setTimeout(() => resolve(), ms);
});
}
async function simulateDrag(element, offsetX, offsetY) {
const rect = element.getBoundingClientRect();
const startX = rect.left + rect.width / 2;
const startY = rect.top + rect.height / 2;
const step = 10
const endX = startX + offsetX + step;
const endY = startY + offsetY + step;
element.dispatchEvent(new MouseEvent('mousedown', {
view: window,
bubbles: true,
cancelable: true,
clientX: startX,
clientY: startY,
button: 0
}));
let currentX = startX;
let currentY = startY;
function move(stepX, stepY) {
currentX += stepX;
currentY += stepY;
document.dispatchEvent(new MouseEvent('mousemove', {
view: window,
bubbles: true,
cancelable: false,
clientX: currentX,
clientY: currentY
}));
}
await sleep(50);
move(step, step)
while (currentX != endX || currentY != endY) {
await sleep(50);
const stepX = Math.min(step, Math.max(-step, endX - currentX));
const stepY = Math.min(step, Math.max(-step, endY - currentY));
move(stepX, stepY);
}
await sleep(50)
document.dispatchEvent(new MouseEvent('mouseup', {
view: window,
bubbles: true,
cancelable: false,
clientX: endX,
clientY: endY,
button: 0
}));
}
{
const canvas = document.querySelector('canvas.H1VXrf.JRr1M.DnOnV');
""" + f"simulateDrag(canvas, {int(-dx)}, {int(dy)});" + """
}
"""

View File

@@ -1,10 +1,13 @@
import constants
import cv2
import json
import numpy as np
from PIL import Image
from dataclasses import dataclass, field
from pathlib import Path
from position import Position
from timer import Timer
from typing import Literal, Optional, Tuple
from PIL import Image
FeatureMethod = Literal["orb", "sift", "akaze", "brisk"]
DEFAULT_METHOD = "orb"
@@ -14,6 +17,7 @@ class VisionChunk:
image: Image.Image
feature_method: FeatureMethod = DEFAULT_METHOD
pos: Optional[Position] = field(default=None, init=False)
keypoints: Optional[list] = field(default=None, init=False)
descriptors: Optional[np.ndarray] = field(default=None, init=False)
_detector: Optional[cv2.Feature2D] = field(default=None, init=False, repr=False)
@@ -27,12 +31,12 @@ class VisionChunk:
self._detector = cv2.ORB_create(
nfeatures=1000,
scaleFactor=1.2,
nlevels=32,
nlevels=16,
edgeThreshold=31,
firstLevel=0,
WTA_K=2,
patchSize=31,
fastThreshold=20,
fastThreshold=10,
)
elif self.feature_method == "sift":
self._detector = cv2.SIFT_create(
@@ -70,30 +74,55 @@ class VisionChunk:
return self._matcher
def _preprocess(self, img_np: np.ndarray) -> np.ndarray:
"""CLAHE предобработка для улучшения контраста"""
"""Предобработка для улучшения сопоставления между снимками разного времени"""
if len(img_np.shape) == 3:
gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
else:
gray = img_np
# Гауссовское размытие для подавления шума и мелких различий
# blurred = cv2.GaussianBlur(gray, (5, 5), 1.0)
# CLAHE для выравнивания контраста между снимками
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
return clahe.apply(gray)
enhanced = clahe.apply(gray)
# Опционально: нормализация гистограммы для устранения различий в освещении
normalized = cv2.normalize(enhanced, None, 0, 255, cv2.NORM_MINMAX)
return normalized
def compute_keypoints(self, force: bool = False) -> Tuple[list[cv2.KeyPoint], Optional[np.ndarray]]:
if self.keypoints is not None and self.descriptors is not None and not force:
return self.keypoints, self.descriptors
timer = Timer()
timer.start()
detector = self._get_detector()
if constants.DEBUG_FPS:
print(f"[VC-DETECTION]: get_detector: {timer.loop() * 1000:.2f} ms")
# PIL -> OpenCV (RGB->BGR)
img_np = np.array(self.image)
if img_np.ndim == 3 and img_np.shape[2] == 3:
img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
if constants.DEBUG_FPS:
print(f"[VC-DETECTION]: converting: {timer.loop() * 1000:.2f} ms")
# CLAHE предобработка
preprocessed = self._preprocess(img_np)
if constants.DEBUG_FPS:
print(f"[VC-DETECTION]: preprocess: {timer.loop() * 1000:.2f} ms")
keypoints, descriptors = detector.detectAndCompute(preprocessed, None)
if constants.DEBUG_FPS:
print(f"[VC-DETECTION]: detect and compute: {timer.loop() * 1000:.2f} ms")
# Получаем массив response для всех точек
responses = np.array([kp.response for kp in keypoints])
@@ -104,6 +133,9 @@ class VisionChunk:
best_keypoints = [keypoints[i] for i in top_indices]
best_descriptors = descriptors[top_indices]
if constants.DEBUG_FPS:
print(f"[VC-DETECTION]: filtration: {timer.loop() * 1000:.2f} ms")
self.keypoints = best_keypoints
self.descriptors = best_descriptors
return self.keypoints, self.descriptors
@@ -122,15 +154,29 @@ class VisionChunk:
Возвращает: src_pts, dst_pts, good_matches, kp1, kp2 (отцентрированные координаты)
"""
# Вычисляем keypoints для обоих
timer = Timer()
timer.start()
kp1, des1 = self.compute_keypoints()
if constants.DEBUG_FPS:
print(f"[VC-KEYPOINTS]: computing 1: {timer.loop() * 1000:.2f} ms")
kp2, des2 = other.compute_keypoints()
if constants.DEBUG_FPS:
print(f"[VC-KEYPOINTS]: computing 2: {timer.loop() * 1000:.2f} ms")
if des1 is None or des2 is None or len(kp1) < 4 or len(kp2) < 4:
return None, None, None, None, None
# kNN matching + Lowe ratio test
matcher = self._get_matcher()
matches_knn = matcher.knnMatch(des1, des2, k=2)
if constants.DEBUG_FPS:
print(f"[VC-KEYPOINTS]: matching: {timer.loop() * 1000:.2f} ms")
good_matches: list[cv2.DMatch] = []
for m_n in matches_knn:
@@ -147,28 +193,22 @@ class VisionChunk:
if len(good_matches) < 4:
return None, None, None, None, None
# Центр изображений
img1_cv = self.to_cv2_gray()
img2_cv = other.to_cv2_gray()
h1, w1 = img1_cv.shape
h2, w2 = img2_cv.shape
cx1, cy1 = w1 // 2, h1 // 2
cx2, cy2 = w2 // 2, h2 // 2
# Отцентрированные координаты (x_rel, y_rel)
src_pts = []
dst_pts = []
for match in good_matches:
pt1 = kp1[match.queryIdx].pt
src_pts.append([pt1[0] - cx1, cy1 - pt1[1]]) # Y вверх!
src_pts.append([pt1[0], pt1[1]])
pt2 = kp2[match.trainIdx].pt
dst_pts.append([pt2[0] - cx2, cy2 - pt2[1]])
dst_pts.append([pt2[0], pt2[1]])
src_pts = np.float32(src_pts).reshape(-1, 1, 2)
dst_pts = np.float32(dst_pts).reshape(-1, 1, 2)
if constants.DEBUG_FPS:
print(f"[VC-KEYPOINTS]: filtration: {timer.loop() * 1000:.2f} ms")
return src_pts, dst_pts, good_matches, kp1, kp2
def to_cv2_gray(self) -> np.ndarray:

View File

@@ -3,14 +3,16 @@
Модуль для управления общим окном визуализации
"""
from PIL import Image
from enum import Enum
from scipy.interpolate import make_interp_spline
import cv2
import matplotlib
import matplotlib.axes
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from enum import Enum
import cv2
from PIL import Image
import matplotlib
# Настройки matplotlib
matplotlib.use('TkAgg')
@@ -93,14 +95,14 @@ class VisualizationManager:
self.ax_matches.axis('off')
# Сопоставление точек (средний средний угол)
self.ax_chunk_matches = self.fig.add_subplot(gs[1, 2])
self.ax_chunk_matches = self.fig.add_subplot(gs[1, 1:3])
self.ax_chunk_matches.set_title('Chunk Matching')
self.ax_chunk_matches.axis('off')
# Визуализация движения ключевых точек (левый нижний угол)
self.ax_motion_vectors = self.fig.add_subplot(gs[1, 1])
self.ax_motion_vectors.set_title('Motion Vectors - Движение ключевых точек')
self.ax_motion_vectors.axis('off')
# self.ax_motion_vectors = self.fig.add_subplot(gs[1, 1])
# self.ax_motion_vectors.set_title('Motion Vectors - Движение ключевых точек')
# self.ax_motion_vectors.axis('off')
# Визуализация движения ключевых точек на основе матрицы гомографии
self.ax_motion_gomography = self.fig.add_subplot(gs[0, 1])
@@ -157,7 +159,7 @@ class VisualizationManager:
# Рисуем текущую целевую точку
if self.target_idx < len(self.target_pts):
pt = self.target_pts[self.target_idx]
self.ax_global_map.plot(pt[0], pt[1], 'yo', markersize=8, label='Цель (0, 0)')
self.ax_global_map.plot(pt[0], pt[1], 'yo', markersize=8, label='Цель')
self.ax_global_map.legend()
@@ -208,7 +210,37 @@ class VisualizationManager:
self.ax_error_plot.grid(True, alpha=0.3)
if len(self.error_times) > 1:
self.ax_error_plot.plot(self.error_times, self.position_errors, 'b-', linewidth=2)
# Оригинальный график (более прозрачный)
self.ax_error_plot.plot(self.error_times, self.position_errors, 'b-',
linewidth=1, alpha=0.4, label='Погрешность данных')
if len(self.error_times) > 5:
# Сглаженный график
smoothed_times = np.linspace(self.error_times[0], self.error_times[-1], 300)
spl = make_interp_spline(self.error_times, self.position_errors, k=3)
smoothed_errors = spl(smoothed_times)
self.ax_error_plot.plot(smoothed_times, smoothed_errors, 'orange',
linewidth=2, label='Сглаженный тренд')
# if len(self.position_errors) > 5: # Достаточно данных для сглаживания
# window_size = min(11, len(self.position_errors) // 3) # Адаптивный размер окна
# if window_size % 2 == 0: # Должен быть нечетным
# window_size += 1
# # Метод скользящего среднего
# smoothed_errors = np.convolve(
# self.position_errors,
# np.ones(window_size) / window_size,
# mode='valid'
# )
# # Корректируем временную ось для сглаженных данных
# offset = (window_size - 1) // 2
# smoothed_times = self.error_times[offset:offset + len(smoothed_errors)]
self.ax_error_plot.legend(loc='upper right')
# Автоматически масштабируем оси
if len(self.position_errors) > 0:
@@ -219,6 +251,7 @@ class VisualizationManager:
else:
self.ax_error_plot.set_ylim(0, 1)
def update_matches(self, img1: np.ndarray, img2: np.ndarray,
kp1, kp2, matches, transformation_info=None):
"""Обновляет визуализацию сопоставления точек"""
@@ -407,7 +440,6 @@ class VisualizationManager:
# Получаем размеры изображения и центр
height, width = current_frame.shape[:2]
center_x, center_y = width // 2, height // 2
# Создаем сетку точек с заданным шагом
grid_points = []
@@ -424,8 +456,8 @@ class VisualizationManager:
grid_points_centered = []
for pt in grid_points:
# Отцентрируем координаты точно так же, как в detect_and_match_keypoints
centered_x = pt[0] - center_x
centered_y = center_y - pt[1] # Инвертируем Y (изображение Y направлен вниз)
centered_x = pt[0]
centered_y = pt[1]
grid_points_centered.append([centered_x, centered_y])
grid_points_centered = np.array(grid_points_centered, dtype=np.float32)
@@ -442,8 +474,8 @@ class VisualizationManager:
transformed_points = []
for pt in transformed_points_centered:
# Обратное преобразование от центрированных координат к координатам изображения
img_x = pt[0] + center_x
img_y = center_y - pt[1] # Инвертируем Y обратно
img_x = pt[0]
img_y = pt[1] # Инвертируем Y обратно
transformed_points.append([img_x, img_y])
transformed_points = np.array(transformed_points, dtype=np.float32)

View File

@@ -1,32 +1,41 @@
import math
from io import BytesIO
from time import sleep
import os
import cv2
import numpy as np
from PIL import Image
from io import BytesIO
from selenium.webdriver.common.actions.wheel_input import ScrollOrigin
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
def generateURL(lat=49.103814, lon=55.794258, zoom=10):
import constants
import cv2
import math
import numpy as np
import os
import utility
def generateURL(lat: float, lon: float, zoom: int):
return f"https://yandex.ru/maps/43/kazan/?l=sat&ll={lat}%2C{lon}&z={zoom}"
class YandexMap:
def __init__(self):
initial_zoom: int
initial_lat: float
initial_lon: float
def __init__(self, initial_lat=49.103814, initial_lon=55.794258, initial_zoom=10):
self.initial_lat = initial_lat
self.initial_lon = initial_lon
self.initial_zoom = initial_zoom
self.pixel_ratio = constants.YANDEX_PIXEL_RATIO[self.initial_zoom]
options = webdriver.ChromeOptions()
# options.add_experimental_option("detach", True)
self.driver = webdriver.Chrome(options)
self.driver.get(generateURL())
self.driver.get(generateURL(initial_lat, initial_lon, initial_zoom))
self.driver.maximize_window()
sleep(2)
action = ActionChains(self.driver)
# Закрытие левой панели
action = ActionChains(self.driver)
action.click(self.driver.find_element(By.CLASS_NAME, 'sidebar-toggle-button'))
action.perform()
@@ -35,8 +44,25 @@ class YandexMap:
self.driver.execute_script("arguments[0].remove();", self.driver.find_element(By.XPATH, "//nav[@class='map-controls']"))
self.driver.execute_script("arguments[0].remove();", self.driver.find_element(By.XPATH, "//footer"))
self.move(39, -9)
sleep(0.2)
def open(self, lat, lon, zoom):
self.initial_lat = lat
self.initial_lon = lon
self.initial_zoom = zoom
self.pixel_ratio = constants.YANDEX_PIXEL_RATIO[self.initial_zoom]
self.driver.get(generateURL(lat, lon, zoom))
sleep(2)
# Закрытие левой панели
action = ActionChains(self.driver)
action.click(self.driver.find_element(By.CLASS_NAME, 'sidebar-toggle-button'))
action.perform()
self.move(39, -9)
def save_photo(self, filename: str) -> bytes:
return self.driver.save_screenshot(filename)
@@ -60,51 +86,26 @@ class YandexMap:
if i != count - 1:
sleep(0.25)
def make_as_center(self, x: float, y: float):
def move(self, dx: float, dy: float):
html = self.driver.find_element(By.TAG_NAME, 'html')
action = ActionChains(self.driver)
action.move_to_element_with_offset(html, 0, 0)
action.click_and_hold()
dx = (x - 0.5) * self.get_size()[0]
dy = (0.5 - y) * self.get_size()[1]
print(dx, dy)
action.move_by_offset(-dx, dy)
action.release()
action.perform()
def make_screenshot(self, x: float, y: float, width: float, height: float) -> cv2.typing.MatLike:
# Сохраняем скриншот
self.save_photo("temp.png")
# Загружаем изображение
image = cv2.imread("temp.png")
if image is None:
raise ValueError("Не удалось загрузить изображение temp.png")
# Получаем размеры исходного изображения
img_height, img_width = image.shape[:2]
# Преобразуем относительные координаты в абсолютные пиксели
center_x = int(x * img_width)
center_y = int(y * img_height)
crop_width = int(width * img_width)
crop_height = int(height * img_height)
# Вычисляем координаты прямоугольника для кадрирования
x1 = max(0, center_x - crop_width // 2)
y1 = max(0, center_y - crop_height // 2)
x2 = min(img_width, center_x + crop_width // 2)
y2 = min(img_height, center_y + crop_height // 2)
# Проверяем, что прямоугольник имеет положительные размеры
if x2 <= x1 or y2 <= y1:
raise ValueError("Некорректные размеры для кадрирования")
# Кадрируем изображение
cropped_image = image[y1:y2, x1:x2]
# Если нужно вернуть изображение как результат функции:
return cropped_image
def make_as_center(self, x: float, y: float):
dx = (x - 0.5) * self.get_size()[0]
dy = (0.5 - y) * self.get_size()[1]
self.move(dx, dy)
def make_screenshot(self) -> Image.Image:
png = self.driver.get_screenshot_as_png()
im = Image.open(BytesIO(png))
return utility.cv2_to_pil(np.array(im)[:, :])
def get_geolocation(self):
current_url = self.driver.current_url
return utility.parse_yandex_maps_url(current_url)