Merge pull request 'feat/pitch-roll' (#1) from feat/pitch-roll into main
Reviewed-on: #1
This commit was merged in pull request #1.
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
.venv
|
||||
__pycache__
|
||||
*.png
|
||||
images
|
||||
trajectories
|
||||
z
|
||||
|
||||
360
autopilot.py
360
autopilot.py
@@ -3,6 +3,7 @@ from pathlib import Path
|
||||
import math
|
||||
import random
|
||||
|
||||
import constants
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
@@ -11,7 +12,9 @@ from timer import Timer
|
||||
|
||||
from vision_chunk import VisionChunk
|
||||
from visualization import VisualizationManager
|
||||
from geolocation import Geolocation
|
||||
from position import Position
|
||||
from constants import CAMERA_HEIGHT
|
||||
from utility import estimate_transformation_matrix
|
||||
|
||||
random.seed(1)
|
||||
|
||||
@@ -41,7 +44,7 @@ class PilotCommand:
|
||||
class AutoPilot(Pilot):
|
||||
|
||||
# Состояние одометрии
|
||||
geo: Geolocation
|
||||
pos: Position
|
||||
|
||||
chunks: list[VisionChunk] # Ориентиры
|
||||
target_idx: int # Текущий ориентир
|
||||
@@ -51,17 +54,19 @@ class AutoPilot(Pilot):
|
||||
prev_chunk: VisionChunk | None
|
||||
frame_count: int
|
||||
vis_manager: VisualizationManager # Менеджер визуализации (опционально)
|
||||
|
||||
# Положение на основе ориентира
|
||||
reserved_geo: Geolocation | None
|
||||
|
||||
def __init__(self, points = [], chunks = [], viz_manager=None):
|
||||
# Положение на основе ориентира
|
||||
reserved_pos: Position | None
|
||||
proccessing_time: float
|
||||
|
||||
def __init__(self, points = [], chunks = [], viz_manager=None, pixel_ratio: float = 1.):
|
||||
self.prev_chunk = None
|
||||
self.geo = Geolocation(0, 0, 1, 0)
|
||||
self.pos = Position(0, 0, 1, 0, 0, 0)
|
||||
self.chunks = chunks
|
||||
self.frame_count = 0
|
||||
self.vis_manager = viz_manager # Менеджер визуализации
|
||||
self.reserved_geo = None
|
||||
self.reserved_pos = None
|
||||
self.pixel_ratio = pixel_ratio
|
||||
|
||||
# Пороговые значения качества сопоставления/гомографии
|
||||
self.min_inliers: int = 12
|
||||
@@ -74,9 +79,10 @@ class AutoPilot(Pilot):
|
||||
self.target_idx = 0
|
||||
|
||||
self.timer = Timer()
|
||||
self.chunk_points = np.array([[chunk.pos.x, chunk.pos.y] for chunk in self.chunks])
|
||||
|
||||
def get_position(self) -> tuple[float, float]:
|
||||
return self.geo.x, self.geo.y
|
||||
return self.pos.x, self.pos.y
|
||||
|
||||
def calculate_optical_flow(self, prev_chunk: VisionChunk, current_chunk: VisionChunk):
|
||||
"""
|
||||
@@ -87,10 +93,9 @@ class AutoPilot(Pilot):
|
||||
current_gray = current_chunk.to_cv2_gray()
|
||||
|
||||
h, w = prev_gray.shape[:2]
|
||||
cx, cy = w // 2, h // 2
|
||||
|
||||
# Создаем сетку точек для отслеживания (аналогично вашему step=20)
|
||||
step = 35
|
||||
step = 20
|
||||
grid_points = []
|
||||
for y in range(step, h - step, step):
|
||||
for x in range(step, w - step, step):
|
||||
@@ -120,136 +125,147 @@ class AutoPilot(Pilot):
|
||||
return None, None
|
||||
|
||||
# Преобразуем в отцентрированные координаты (Y вверх)
|
||||
src_pts = np.float32([[x - cx, cy - y] for x, y in good_old]).reshape(-1, 1, 2)
|
||||
dst_pts = np.float32([[x - cx, cy - y] for x, y in good_new]).reshape(-1, 1, 2)
|
||||
src_pts = np.float32([[x, y] for x, y in good_old]).reshape(-1, 1, 2)
|
||||
dst_pts = np.float32([[x, y] for x, y in good_new]).reshape(-1, 1, 2)
|
||||
|
||||
return src_pts, dst_pts
|
||||
|
||||
def estimate_transformation_matrix(self, src_pts: np.ndarray, dst_pts: np.ndarray):
|
||||
"""
|
||||
Оценивает матрицу трансформации на основе сопоставленных точек
|
||||
Точки уже отцентрированы относительно центра изображения
|
||||
"""
|
||||
# Используем RANSAC для оценки матрицы гомографии
|
||||
print("Count of points: ", len(src_pts), len(dst_pts))
|
||||
H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0, maxIters=1000)
|
||||
# RH, rmask = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 1.0)
|
||||
|
||||
if H is None:
|
||||
return None
|
||||
|
||||
# Маска инлайеров
|
||||
inliers = int(mask.sum()) if mask is not None else 0
|
||||
total = int(mask.size) if mask is not None else 0
|
||||
inlier_ratio = (inliers / total) if total > 0 else 0.0
|
||||
|
||||
# Ошибка репроекции по инлайерам (RMSE)
|
||||
rmse = None
|
||||
try:
|
||||
# Проецируем src через H и сравниваем с dst
|
||||
proj = cv2.perspectiveTransform(src_pts, H)
|
||||
errs = np.linalg.norm((proj - dst_pts).reshape(-1, 2), axis=1)
|
||||
if mask is not None and mask.shape[0] == errs.shape[0]:
|
||||
errs = errs[mask.ravel().astype(bool)]
|
||||
if errs.size > 0:
|
||||
rmse = float(np.sqrt(np.mean(errs ** 2)))
|
||||
except Exception:
|
||||
rmse = None
|
||||
|
||||
# Извлекаем параметры трансформации из матрицы гомографии
|
||||
# H = [a11 a12 tx]
|
||||
# [a21 a22 ty]
|
||||
# [0 0 1 ]
|
||||
|
||||
# Масштаб и поворот
|
||||
a11, a12 = H[0, 0], H[0, 1]
|
||||
a21, a22 = H[1, 0], H[1, 1]
|
||||
|
||||
# Смещение (уже отцентрировано)
|
||||
tx, ty = -H[0, 2], -H[1, 2]
|
||||
|
||||
# print(" [Pilot] translate:", tx, ty)
|
||||
|
||||
# Вычисляем угол поворота
|
||||
angle = -np.arctan2(a21, a11)
|
||||
|
||||
# Вычисляем масштаб
|
||||
scale_x = np.sqrt(a11**2 + a21**2)
|
||||
scale_y = np.sqrt(a12**2 + a22**2)
|
||||
scale = (scale_x + scale_y) / 2
|
||||
|
||||
return {
|
||||
'translation': (tx, ty), # Уже отцентрировано
|
||||
'rotation': angle,
|
||||
'scale': scale,
|
||||
'homography': H,
|
||||
'mask': mask,
|
||||
'inliers': inliers,
|
||||
'inliers_ratio': inlier_ratio,
|
||||
'rmse': rmse
|
||||
}
|
||||
|
||||
def update_drone_position(self, transformation_info: dict):
|
||||
def update_drone_position(self, homography_matrix: np.ndarray):
|
||||
"""
|
||||
Обновляет позицию и угол БПЛА на основе трансформации изображения
|
||||
Координаты уже отцентрированы относительно центра изображения
|
||||
"""
|
||||
homography = transformation_info['homography']
|
||||
self.geo @= homography
|
||||
|
||||
if self.reserved_geo is not None:
|
||||
self.reserved_geo @= homography
|
||||
self.pos.iapply(homography_matrix)
|
||||
|
||||
def get_drone_state(self) -> dict:
|
||||
"""
|
||||
Возвращает текущее состояние БПЛА
|
||||
"""
|
||||
return {
|
||||
'x': self.geo.x,
|
||||
'y': self.geo.y,
|
||||
'angle': self.geo.angle,
|
||||
'angle_degrees': math.degrees(self.geo.angle),
|
||||
'x': self.pos.x,
|
||||
'y': self.pos.y,
|
||||
'angle': self.pos.yaw,
|
||||
'angle_degrees': math.degrees(self.pos.yaw),
|
||||
'frame_count': self.frame_count
|
||||
}
|
||||
|
||||
def get_position_by_chunk(self) -> Geolocation | None:
|
||||
# Пытаемся найти ориентир на картинке:
|
||||
def get_position_by_chunk(self) -> Position | None:
|
||||
landmark_timer = Timer()
|
||||
landmark_timer.start()
|
||||
|
||||
cur_pos = np.array([self.pos.x, self.pos.y])
|
||||
closest_chunk_idx = ((self.chunk_points - cur_pos) ** 2).sum(1).argmin()
|
||||
|
||||
current_chunk = self.prev_chunk
|
||||
landmark_chunk = self.chunks[self.target_idx]
|
||||
landmark_chunk = self.chunks[closest_chunk_idx]
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[LANDMARK]: Closest chunk finding: {landmark_timer.loop() * 1000:.2f} ms")
|
||||
|
||||
# Краевой случай: отсутствие чанков
|
||||
if current_chunk is None or landmark_chunk is None:
|
||||
return None
|
||||
|
||||
landmark_timer.start()
|
||||
src_pts, dst_pts, matches, kp1, kp2 = landmark_chunk.detect_and_match_keypoints(current_chunk)
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[LANDMARK]: detect and match keypoints: {landmark_timer.loop() * 1000:.2f} ms")
|
||||
|
||||
landmark_timer.stop()
|
||||
|
||||
# Визуализация (если нужна)
|
||||
if src_pts is not None and dst_pts is not None and self.vis_manager:
|
||||
was_enabled = self.timer.enabled
|
||||
if was_enabled: self.timer.stop()
|
||||
self.vis_manager.update_chunk_matches(landmark_chunk.to_numpy(), current_chunk.to_numpy(), kp1, kp2, matches)
|
||||
if was_enabled: self.timer.start()
|
||||
if was_enabled:
|
||||
self.timer.stop()
|
||||
self.vis_manager.update_chunk_matches(
|
||||
landmark_chunk.to_numpy(),
|
||||
current_chunk.to_numpy(),
|
||||
kp1, kp2, matches
|
||||
)
|
||||
if was_enabled:
|
||||
self.timer.start()
|
||||
|
||||
if src_pts is not None and dst_pts is not None:
|
||||
# Оцениваем матрицу трансформации
|
||||
landmark_transform = self.estimate_transformation_matrix(src_pts, dst_pts)
|
||||
# Если ориентир достоверно найден — скорректируем глобальные координаты и угол
|
||||
if landmark_transform is not None:
|
||||
ok_scale = (self.min_scale <= landmark_transform['scale'] <= self.max_scale)
|
||||
ok_inliers = (landmark_transform.get('inliers', 0) >= self.min_inliers)
|
||||
ratio = landmark_transform.get('inliers_ratio', 0.0)
|
||||
ok_ratio = (ratio >= self.min_inlier_ratio)
|
||||
rmse = landmark_transform.get('rmse', None)
|
||||
ok_rmse = (rmse is not None and rmse <= self.max_reproj_rmse)
|
||||
if ok_scale and ok_inliers and ok_ratio and ok_rmse:
|
||||
# print("[HELP]")
|
||||
# print("Matrix", landmark_transform['homography'])
|
||||
# print("Position", self.x, self.y)
|
||||
# print("Position of point", self.points[self.target_idx])
|
||||
# print("[PILOT]", rmse, ratio, ok_rmse)
|
||||
# if False:
|
||||
# Считаем абсолютную позу относительно координат ориентира
|
||||
landmark_world_x, landmark_world_y = self.points[self.target_idx]
|
||||
landmark = Geolocation(landmark_world_x, landmark_world_y, 1, 0)
|
||||
homography = landmark_transform['homography']
|
||||
# homography = np.linalg.inv(homography)
|
||||
print(f" [Pilot] Landmark correction applied (inliers={landmark_transform['inliers']}, ratio={ratio:.2f}, rmse={rmse:.2f})")
|
||||
return landmark @ homography
|
||||
return None
|
||||
landmark_timer.start()
|
||||
# Краевой случай: нет точек или недостаточно матчей
|
||||
if src_pts is None or dst_pts is None:
|
||||
return None
|
||||
|
||||
num_matches = len(src_pts)
|
||||
if num_matches < 20:
|
||||
return None
|
||||
|
||||
# Оценка матрицы гомографии
|
||||
landmark_timer.loop()
|
||||
landmark_transform, mask = estimate_transformation_matrix(src_pts, dst_pts)
|
||||
num_inliers = int(np.sum(mask))
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[LANDMARK]: matrix estimation: {landmark_timer.loop() * 1000:.2f} ms")
|
||||
|
||||
# Краевой случай: матрица не найдена
|
||||
if landmark_transform is None or mask is None:
|
||||
return None
|
||||
|
||||
# === КРИТЕРИИ ПРИНЯТИЯ РЕШЕНИЯ ===
|
||||
|
||||
# 1. Минимальное количество инлайеров (абсолютное)
|
||||
MIN_INLIERS_ABSOLUTE = 6
|
||||
if num_inliers < MIN_INLIERS_ABSOLUTE:
|
||||
return None
|
||||
|
||||
# 2. Процент инлайеров от общего числа матчей
|
||||
inlier_ratio = num_inliers / num_matches
|
||||
|
||||
if constants.DEBUG_LANDMARK:
|
||||
print("[LANDMARK]: inlier_ratio=", inlier_ratio)
|
||||
|
||||
MIN_INLIER_RATIO = 0.6
|
||||
if inlier_ratio < MIN_INLIER_RATIO:
|
||||
return None
|
||||
|
||||
# 3. Проверка качества гомографии (детерминант для выявления вырожденных случаев)
|
||||
det = np.linalg.det(landmark_transform[:2, :2])
|
||||
# Детерминант должен быть близок к 1 (без сильного масштабирования)
|
||||
if abs(det) < 0.1 or abs(det) > 10.0:
|
||||
return None
|
||||
|
||||
# 4. Проверка на валидность перспективного преобразования
|
||||
# Элементы третьей строки не должны быть слишком большими
|
||||
if abs(landmark_transform[2, 0]) > 0.01 or abs(landmark_transform[2, 1]) > 0.01:
|
||||
return None
|
||||
|
||||
# 5. Дополнительная проверка: средняя ошибка репроекции для инлайеров
|
||||
inlier_src = src_pts[mask.ravel() == 1]
|
||||
inlier_dst = dst_pts[mask.ravel() == 1]
|
||||
|
||||
# Преобразуем точки через найденную гомографию
|
||||
transformed_pts = cv2.perspectiveTransform(inlier_src, landmark_transform)
|
||||
|
||||
# Вычисляем ошибку репроекции
|
||||
reprojection_errors = np.sqrt(np.sum((transformed_pts - inlier_dst) ** 2, axis=2))
|
||||
mean_error = np.mean(reprojection_errors)
|
||||
|
||||
MAX_MEAN_REPROJECTION_ERROR = 1.1 # пиксели
|
||||
|
||||
if constants.DEBUG_LANDMARK:
|
||||
print("[LANDMARK]: Mean_error=", mean_error)
|
||||
|
||||
if mean_error > MAX_MEAN_REPROJECTION_ERROR:
|
||||
return None
|
||||
|
||||
# 6. Проверка стабильности: если слишком много хороших совпадений, но мало инлайеров - подозрительно
|
||||
if num_matches > 50 and inlier_ratio < 0.15:
|
||||
return None
|
||||
|
||||
# === ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ ===
|
||||
print("[LANDMARK]: Correction Applied")
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[LANDMARK]: time: {landmark_timer.get_elapsed() * 1000:.2f} ms")
|
||||
|
||||
return landmark_chunk.pos.apply(landmark_transform)
|
||||
|
||||
|
||||
def handle(self, current_chunk: VisionChunk) -> PilotCommand:
|
||||
@@ -260,58 +276,57 @@ class AutoPilot(Pilot):
|
||||
self.prev_chunk = current_chunk
|
||||
self.timer.stop()
|
||||
return PilotCommand(processing_time=self.timer.get_elapsed())
|
||||
|
||||
# Расстояние до цели
|
||||
distance_to_target = math.sqrt(
|
||||
(self.points[self.target_idx][0] - self.geo.x) ** 2 +
|
||||
(self.points[self.target_idx][1] - self.geo.y) ** 2
|
||||
)
|
||||
|
||||
|
||||
# Вычисляем оптический поток для покадрового сравнения
|
||||
optical_flow_timer = Timer()
|
||||
optical_flow_timer.start()
|
||||
matching_timer = Timer()
|
||||
matching_timer.start()
|
||||
src_pts, dst_pts = self.calculate_optical_flow(self.prev_chunk, current_chunk)
|
||||
optical_flow_timer.stop()
|
||||
print(f"Optical flow calculating: {optical_flow_timer.get_elapsed() * 1000:.2f} ms")
|
||||
# src_pts, dst_pts, _, _, _ = self.prev_chunk.detect_and_match_keypoints(current_chunk)
|
||||
matching_timer.stop()
|
||||
print(f"Matching calculating: {matching_timer.get_elapsed() * 1000:.2f} ms")
|
||||
|
||||
# Оцениваем смещение БПЛА
|
||||
if src_pts is not None and dst_pts is not None:
|
||||
# Оцениваем матрицу трансформации
|
||||
|
||||
|
||||
matrix_estimation_timer = Timer()
|
||||
matrix_estimation_timer.start()
|
||||
transformation_info = self.estimate_transformation_matrix(src_pts, dst_pts)
|
||||
homography_matrix, _ = estimate_transformation_matrix(src_pts, dst_pts)
|
||||
matrix_estimation_timer.stop()
|
||||
print(f"Transformation matrix estimating: {matrix_estimation_timer.get_elapsed() * 1000:.2f} ms")
|
||||
|
||||
if transformation_info:
|
||||
# Обновляем позицию и угол БПЛА (одометрия по кадрам)
|
||||
self.update_drone_position(transformation_info)
|
||||
|
||||
print(f"Transformation matrix updating: {matrix_estimation_timer.get_elapsed() * 1000:.2f} ms")
|
||||
|
||||
if homography_matrix is not None:
|
||||
self.update_drone_position(homography_matrix)
|
||||
self.timer.stop()
|
||||
|
||||
|
||||
# Обновляем визуализацию
|
||||
if self.vis_manager:
|
||||
# Используем визуализацию движения точек по сетке
|
||||
homography_matrix = transformation_info['homography']
|
||||
self.vis_manager.update_homography_grid(
|
||||
current_chunk.to_numpy(),
|
||||
homography_matrix,
|
||||
grid_step=85)
|
||||
current_chunk.to_numpy(),
|
||||
homography_matrix,
|
||||
grid_step=35)
|
||||
|
||||
self.timer.start()
|
||||
|
||||
|
||||
chunk_timer = Timer()
|
||||
chunk_timer.start()
|
||||
# Пытаемся найти ориентир на картинке:
|
||||
self.prev_chunk = current_chunk
|
||||
# npos = self.get_position_by_chunk()
|
||||
# if npos is not None:
|
||||
# self.reserved_geo = npos
|
||||
# Для улучшения среднего FPS
|
||||
if self.frame_count % 5 == 0:
|
||||
pos_by_chunk = self.get_position_by_chunk()
|
||||
if pos_by_chunk is not None:
|
||||
self.pos = pos_by_chunk
|
||||
|
||||
chunk_timer.stop()
|
||||
print(f"Chunk timer: {chunk_timer.get_elapsed() * 1000:.2f} ms")
|
||||
command = self.make_command()
|
||||
self.timer.reset()
|
||||
return command
|
||||
|
||||
def make_command(self) -> PilotCommand:
|
||||
# Расстояние до цели
|
||||
distance_to_target = math.sqrt(
|
||||
(self.points[self.target_idx][0] - self.pos.x) ** 2 +
|
||||
(self.points[self.target_idx][1] - self.pos.y) ** 2
|
||||
) * self.pixel_ratio
|
||||
|
||||
if distance_to_target < 35:
|
||||
self.target_idx += 1
|
||||
@@ -320,20 +335,36 @@ class AutoPilot(Pilot):
|
||||
return PilotCommand(stop=True)
|
||||
|
||||
distance_to_target = math.sqrt(
|
||||
(self.points[self.target_idx][0] - self.geo.x) ** 2 +
|
||||
(self.points[self.target_idx][1] - self.geo.y) ** 2
|
||||
(self.points[self.target_idx][0] - self.pos.x) ** 2 +
|
||||
(self.points[self.target_idx][1] - self.pos.y) ** 2
|
||||
)
|
||||
|
||||
if self.reserved_geo is not None:
|
||||
self.geo = self.reserved_geo
|
||||
self.reserved_geo = None
|
||||
angle_trajectory = self.pos.yaw + math.pi / 2
|
||||
|
||||
# Проверка на слепую зону
|
||||
R = 120
|
||||
blind = np.array([
|
||||
[
|
||||
self.pos.x * self.pixel_ratio + R * np.cos(angle_trajectory - np.pi / 2),
|
||||
self.pos.y * self.pixel_ratio + R * np.sin(angle_trajectory - np.pi / 2),
|
||||
],
|
||||
[
|
||||
self.pos.x * self.pixel_ratio + R * np.cos(angle_trajectory + np.pi / 2),
|
||||
self.pos.y * self.pixel_ratio + R * np.sin(angle_trajectory + np.pi / 2),
|
||||
]
|
||||
])
|
||||
|
||||
blind -= self.points[self.target_idx] * self.pixel_ratio
|
||||
blind = np.hypot(blind[:, 0], blind[:, 1])
|
||||
|
||||
print("R: ", blind)
|
||||
if np.min(blind) < R:
|
||||
return PilotCommand(0, 10, False, self.timer.get_elapsed())
|
||||
|
||||
|
||||
|
||||
# Вычисляем угол к цели
|
||||
target_angle = math.atan2(self.points[self.target_idx][1] - self.geo.y, self.points[self.target_idx][0] - self.geo.x)
|
||||
|
||||
angle_trajectory = self.geo.angle + math.pi / 2
|
||||
|
||||
# print("[ANGLE]", angle_trajectory, "->", target_angle)
|
||||
target_angle = math.atan2(self.points[self.target_idx][1] - self.pos.y, self.points[self.target_idx][0] - self.pos.x)
|
||||
|
||||
# Вычисляем разность углов (направление поворота)
|
||||
angle_diff = target_angle - angle_trajectory
|
||||
@@ -343,16 +374,15 @@ class AutoPilot(Pilot):
|
||||
if angle_diff >= math.pi:
|
||||
angle_diff -= 2 * math.pi
|
||||
|
||||
d_r = max(10, min(35., distance_to_target / 2))
|
||||
d_a_limit = d_r / 10 * 0.01
|
||||
d_r = max(5, min(10., distance_to_target / 2))
|
||||
d_a_limit = np.radians(5)
|
||||
|
||||
command = PilotCommand(
|
||||
max(min(d_a_limit, angle_diff), -d_a_limit),
|
||||
d_r, False, self.timer.get_elapsed()
|
||||
)
|
||||
self.timer.reset()
|
||||
return command
|
||||
|
||||
def reset_position(self, x: float = 0.0, y: float = 0.0, angle: float = 0.0):
|
||||
"""Сбрасывает позицию и угол БПЛА"""
|
||||
self.geo = Geolocation(x, y, 1, angle)
|
||||
self.pos = Position(x, y, 1, angle)
|
||||
|
||||
35
constants.py
Normal file
35
constants.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import numpy as np
|
||||
|
||||
# Ширина 1 пикселя в метрах
|
||||
YANDEX_PIXEL_RATIO = {
|
||||
15: 2830 / 1049,
|
||||
18: 350 / 1049,
|
||||
}
|
||||
|
||||
GOOGLE_PIXEL_RATIO = {
|
||||
15: 2766 / 1031,
|
||||
18: 346 / 1031,
|
||||
}
|
||||
|
||||
WINDOW_HEIGHT = 1031
|
||||
|
||||
# Ширина и высота снимка в пикселях
|
||||
CHUNK_WIDTH = 700
|
||||
|
||||
# Высота, с которой произведен снимок (в метрах)
|
||||
CAMERA_HEIGHT = 150
|
||||
|
||||
# Угол обзора
|
||||
CAMERA_FOV = np.radians(90)
|
||||
|
||||
# Матрица параметров камеры
|
||||
_K_FOCUS_DISTANCE = np.tan(CAMERA_FOV / 2) * CHUNK_WIDTH / 2
|
||||
_K_CENTER = CHUNK_WIDTH / 2
|
||||
K = np.array([
|
||||
[_K_FOCUS_DISTANCE, 0, _K_CENTER],
|
||||
[0, _K_FOCUS_DISTANCE, _K_CENTER],
|
||||
[0, 0, 1]
|
||||
])
|
||||
|
||||
DEBUG_FPS: bool = False
|
||||
DEBUG_LANDMARK: bool = False
|
||||
2
datasets/ya_go_maps/.gitignore
vendored
Normal file
2
datasets/ya_go_maps/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
images/homography_cache
|
||||
*.zip
|
||||
43
datasets/ya_go_maps/generate_dataset.py
Normal file
43
datasets/ya_go_maps/generate_dataset.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from ...google_map import GoogleMap
|
||||
from ...simulator import Simulator
|
||||
from ...yandex_map import YandexMap
|
||||
|
||||
LAT_MIN, LAT_MAX = 44.960236, 54.967830
|
||||
LON_MIN, LON_MAX = 53.084167, 58.677977
|
||||
|
||||
def create_new_asset(yandex_map, google_map):
|
||||
folder = Path('dataset_ya_go_maps')
|
||||
|
||||
id = 0
|
||||
print(id)
|
||||
while (folder / f"{id:0{4}}_google.png").exists():
|
||||
id += 1
|
||||
|
||||
google_file = folder / f"{id:0{4}}_google.png"
|
||||
yandex_file = folder / f"{id:0{4}}_yandex.png"
|
||||
|
||||
lat = np.random.rand() * (LAT_MAX - LAT_MIN) + LAT_MIN
|
||||
lon = np.random.rand() * (LON_MAX - LON_MIN) + LON_MIN
|
||||
|
||||
yandex_map.open(lat, lon, 18)
|
||||
google_map.open(lat, lon, 18)
|
||||
|
||||
simulator = Simulator()
|
||||
simulator._apply_perspective_transform(yandex_map.make_screenshot()).save(yandex_file)
|
||||
simulator._apply_perspective_transform(google_map.make_screenshot()).save(google_file)
|
||||
|
||||
def main():
|
||||
folder = Path('dataset_ya_go_maps')
|
||||
if not folder.exists():
|
||||
folder.mkdir()
|
||||
|
||||
yandex_map = YandexMap(initial_zoom=15)
|
||||
google_map = GoogleMap(initial_zoom=15)
|
||||
|
||||
for i in range(4):
|
||||
create_new_asset(yandex_map, google_map)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
330
examples/page.html
Normal file
330
examples/page.html
Normal file
@@ -0,0 +1,330 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Симулятор проекции дрона</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background: #f5f5f5;
|
||||
}
|
||||
.container {
|
||||
display: flex;
|
||||
gap: 30px;
|
||||
align-items: flex-start;
|
||||
}
|
||||
.canvas-wrapper {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
}
|
||||
canvas {
|
||||
border: 2px solid #333;
|
||||
display: block;
|
||||
}
|
||||
.controls {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
min-width: 300px;
|
||||
}
|
||||
.control-group {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 5px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
input[type="range"] {
|
||||
width: 100%;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
.value-display {
|
||||
text-align: right;
|
||||
color: #666;
|
||||
font-size: 14px;
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
}
|
||||
.info {
|
||||
background: #e3f2fd;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin-top: 20px;
|
||||
font-size: 14px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Симулятор проекции камеры дрона</h1>
|
||||
|
||||
<div class="container">
|
||||
<div class="canvas-wrapper">
|
||||
<canvas id="canvas" width="720" height="720"></canvas>
|
||||
</div>
|
||||
|
||||
<div class="controls">
|
||||
<div class="control-group">
|
||||
<label for="yaw">Поворот (Yaw):</label>
|
||||
<input type="range" id="yaw" min="-180" max="180" value="0" step="1">
|
||||
<div class="value-display"><span id="yaw-value">0</span>°</div>
|
||||
</div>
|
||||
|
||||
<div class="control-group">
|
||||
<label for="pitch">Тангаж (Pitch):</label>
|
||||
<input type="range" id="pitch" min="-90" max="90" value="0" step="0.1">
|
||||
<div class="value-display"><span id="pitch-value">0</span>°</div>
|
||||
</div>
|
||||
|
||||
<div class="control-group">
|
||||
<label for="roll">Крен (Roll):</label>
|
||||
<input type="range" id="roll" min="-90" max="90" value="0" step="0.1">
|
||||
<div class="value-display"><span id="roll-value">0</span>°</div>
|
||||
</div>
|
||||
|
||||
<div class="control-group">
|
||||
<label for="height">Высота (масштаб):</label>
|
||||
<input type="range" id="height" min="0.5" max="1.5" value="1" step="0.01">
|
||||
<div class="value-display"><span id="height-value">1.00</span></div>
|
||||
</div>
|
||||
|
||||
<div class="info">
|
||||
<strong>Координаты углов:</strong>
|
||||
<div id="coordinates" style="margin-top: 10px; font-family: monospace; font-size: 12px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjs/14.8.1/math.js"></script>
|
||||
|
||||
<script>
|
||||
|
||||
const canvas = document.getElementById('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
// Элементы управления
|
||||
const yawSlider = document.getElementById('yaw');
|
||||
const pitchSlider = document.getElementById('pitch');
|
||||
const rollSlider = document.getElementById('roll');
|
||||
const heightSlider = document.getElementById('height');
|
||||
|
||||
const yawValue = document.getElementById('yaw-value');
|
||||
const pitchValue = document.getElementById('pitch-value');
|
||||
const rollValue = document.getElementById('roll-value');
|
||||
const heightValue = document.getElementById('height-value');
|
||||
const coordinatesDiv = document.getElementById('coordinates');
|
||||
|
||||
// Исходные углы изображения 1000x1000
|
||||
const sourceCorners = [
|
||||
[-500, -500, 1], // Верхний левый
|
||||
[500, -500, 1], // Верхний правый
|
||||
[500, 500, 1], // Нижний правый
|
||||
[-500, 500, 1] // Нижний левый
|
||||
];
|
||||
|
||||
// Функция умножения матриц
|
||||
function multiplyMatrixVector(matrix, vector) {
|
||||
return [
|
||||
matrix[0][0] * vector[0] + matrix[0][1] * vector[1] + matrix[0][2] * vector[2],
|
||||
matrix[1][0] * vector[0] + matrix[1][1] * vector[1] + matrix[1][2] * vector[2],
|
||||
matrix[2][0] * vector[0] + matrix[2][1] * vector[1] + matrix[2][2] * vector[2]
|
||||
];
|
||||
}
|
||||
|
||||
// Создание матрицы вращения вокруг X (крен)
|
||||
function rotationX(angle) {
|
||||
const rad = angle * Math.PI / 180;
|
||||
const c = Math.cos(rad);
|
||||
const s = Math.sin(rad);
|
||||
return [
|
||||
[1, 0, 0],
|
||||
[0, c, -s],
|
||||
[0, s, c]
|
||||
];
|
||||
}
|
||||
|
||||
// Создание матрицы вращения вокруг Y (тангаж)
|
||||
function rotationY(angle) {
|
||||
const rad = angle * Math.PI / 180;
|
||||
const c = Math.cos(rad);
|
||||
const s = Math.sin(rad);
|
||||
return [
|
||||
[c, 0, s],
|
||||
[0, 1, 0],
|
||||
[-s, 0, c]
|
||||
];
|
||||
}
|
||||
|
||||
// Создание матрицы вращения вокруг Z (поворот)
|
||||
function rotationZ(angle) {
|
||||
const rad = angle * Math.PI / 180;
|
||||
const c = Math.cos(rad);
|
||||
const s = Math.sin(rad);
|
||||
return [
|
||||
[c, -s, 0],
|
||||
[s, c, 0],
|
||||
[0, 0, 1]
|
||||
];
|
||||
}
|
||||
|
||||
// Умножение двух матриц 3x3
|
||||
function multiplyMatrices(a, b) {
|
||||
const result = [[0,0,0], [0,0,0], [0,0,0]];
|
||||
for (let i = 0; i < 3; i++) {
|
||||
for (let j = 0; j < 3; j++) {
|
||||
for (let k = 0; k < 3; k++) {
|
||||
result[i][j] += a[i][k] * b[k][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function inv(a) {
|
||||
return math.inv(math.matrix(a))._data;
|
||||
}
|
||||
|
||||
// Проекция 3D точки на 2D плоскость
|
||||
function project3Dto2D(point, height) {
|
||||
const focalLength = 360;
|
||||
const distance = 1000 * height;
|
||||
|
||||
const z = point[2] + distance;
|
||||
// if (z <= 0) z = 0.01; // Избегаем деления на ноль
|
||||
|
||||
const x = 360 + (focalLength * point[0]) / z;
|
||||
const y = 360 + (focalLength * point[1]) / z;
|
||||
|
||||
return [x, y];
|
||||
}
|
||||
|
||||
// Главная функция отрисовки
|
||||
function render() {
|
||||
const yaw = parseFloat(yawSlider.value);
|
||||
const pitch = parseFloat(pitchSlider.value);
|
||||
const roll = parseFloat(rollSlider.value);
|
||||
const height = parseFloat(heightSlider.value);
|
||||
|
||||
// Обновление отображаемых значений
|
||||
yawValue.textContent = yaw;
|
||||
pitchValue.textContent = pitch.toFixed(1);
|
||||
rollValue.textContent = roll.toFixed(1);
|
||||
heightValue.textContent = height.toFixed(2);
|
||||
|
||||
// Создание комбинированной матрицы вращения
|
||||
const Rz = rotationZ(yaw);
|
||||
const Ry = rotationY(pitch);
|
||||
const Rx = rotationX(roll);
|
||||
|
||||
// R = Rz * Ry * Rx
|
||||
const R_temp = multiplyMatrices(Rz, Ry);
|
||||
const _R = multiplyMatrices(R_temp, Rx);
|
||||
// _R[0][2] = _R[1][2] = 0;
|
||||
// _R[2][2] = 1;
|
||||
const R = _R;
|
||||
console.log(_R, R)
|
||||
const K = [
|
||||
[375, 0, 375],
|
||||
[0, 375, 375],
|
||||
[0, 0, 1]
|
||||
];
|
||||
const T = [
|
||||
[0.5, 0, 375],
|
||||
[0, 0.5, 375],
|
||||
[0, 0, 1],
|
||||
]
|
||||
const H = multiplyMatrices(K, multiplyMatrices(R, inv(K)));
|
||||
const Z = multiplyMatrices(H, T)
|
||||
// Трансформация углов
|
||||
const transformedCorners = sourceCorners.map(corner => {
|
||||
const rotated = multiplyMatrixVector(Z, corner);
|
||||
const scaled = [
|
||||
rotated[0],
|
||||
rotated[1],
|
||||
rotated[2]
|
||||
];
|
||||
console.log(K, R, H, rotated);
|
||||
return [rotated[0] / rotated[2], rotated[1] / rotated[2]];
|
||||
// return project3Dto2D(scaled, height);
|
||||
});
|
||||
|
||||
// Очистка canvas
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
||||
|
||||
// Рисование сетки для справки
|
||||
ctx.strokeStyle = '#e0e0e0';
|
||||
ctx.lineWidth = 1;
|
||||
for (let i = 0; i <= 720; i += 90) {
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(i, 0);
|
||||
ctx.lineTo(i, 720);
|
||||
ctx.stroke();
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(0, i);
|
||||
ctx.lineTo(720, i);
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Рисование центральной точки
|
||||
ctx.fillStyle = '#ff0000';
|
||||
ctx.beginPath();
|
||||
ctx.arc(360, 360, 5, 0, Math.PI * 2);
|
||||
ctx.fill();
|
||||
|
||||
// Рисование четырёхугольника
|
||||
ctx.strokeStyle = '#0066cc';
|
||||
ctx.fillStyle = 'rgba(0, 102, 204, 0.2)';
|
||||
ctx.lineWidth = 3;
|
||||
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(transformedCorners[0][0], transformedCorners[0][1]);
|
||||
for (let i = 1; i < transformedCorners.length; i++) {
|
||||
ctx.lineTo(transformedCorners[i][0], transformedCorners[i][1]);
|
||||
}
|
||||
ctx.closePath();
|
||||
ctx.fill();
|
||||
ctx.stroke();
|
||||
|
||||
// Рисование углов
|
||||
ctx.fillStyle = '#ff6600';
|
||||
transformedCorners.forEach((corner, i) => {
|
||||
ctx.beginPath();
|
||||
ctx.arc(corner[0], corner[1], 6, 0, Math.PI * 2);
|
||||
ctx.fill();
|
||||
|
||||
// Подписи углов
|
||||
ctx.fillStyle = '#333';
|
||||
ctx.font = '12px Arial';
|
||||
ctx.fillText(`${i + 1}`, corner[0] + 10, corner[1] - 10);
|
||||
ctx.fillStyle = '#ff6600';
|
||||
});
|
||||
|
||||
// Вывод координат
|
||||
let coordText = '';
|
||||
transformedCorners.forEach((corner, i) => {
|
||||
coordText += `Угол ${i + 1}: (${corner[0].toFixed(1)}, ${corner[1].toFixed(1)})<br>`;
|
||||
});
|
||||
coordinatesDiv.innerHTML = coordText;
|
||||
}
|
||||
|
||||
// Обработчики событий
|
||||
yawSlider.addEventListener('input', render);
|
||||
pitchSlider.addEventListener('input', render);
|
||||
rollSlider.addEventListener('input', render);
|
||||
heightSlider.addEventListener('input', render);
|
||||
|
||||
// Начальная отрисовка
|
||||
render();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
249
examples/page2.html
Normal file
249
examples/page2.html
Normal file
@@ -0,0 +1,249 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Интерактивная система координат с трансформацией</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 50px auto;
|
||||
padding: 20px;
|
||||
}
|
||||
canvas {
|
||||
border: 1px solid #ccc;
|
||||
display: block;
|
||||
margin: 20px auto;
|
||||
background: #f9f9f9;
|
||||
}
|
||||
.controls {
|
||||
display: grid;
|
||||
grid-template-columns: 150px 1fr 80px;
|
||||
gap: 15px;
|
||||
margin-bottom: 20px;
|
||||
align-items: center;
|
||||
}
|
||||
input[type="range"] {
|
||||
width: 100%;
|
||||
}
|
||||
label {
|
||||
font-weight: bold;
|
||||
}
|
||||
.value {
|
||||
text-align: center;
|
||||
font-family: monospace;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Система координат с матрицей трансформации 3×3</h1>
|
||||
|
||||
<div class="controls">
|
||||
<label for="scale">Масштаб:</label>
|
||||
<input type="range" id="scale" min="0.5" max="3" step="0.1" value="1">
|
||||
<span class="value" id="scaleValue">1.0</span>
|
||||
|
||||
<label for="projX">Проекция X (px):</label>
|
||||
<input type="range" id="projX" min="-1.0" max="1.0" step="0.01" value="0">
|
||||
<span class="value" id="projXValue">0.00</span>
|
||||
|
||||
<label for="projY">Проекция Y (py):</label>
|
||||
<input type="range" id="projY" min="-1.0" max="1.0" step="0.01" value="0">
|
||||
<span class="value" id="projYValue">0.00</span>
|
||||
|
||||
<label for="rotation">Поворот:</label>
|
||||
<input type="range" id="rotation" min="0" max="360" step="1" value="0">
|
||||
<span class="value" id="rotationValue">0°</span>
|
||||
</div>
|
||||
|
||||
<canvas id="canvas" width="600" height="600"></canvas>
|
||||
|
||||
<script>
|
||||
const canvas = document.getElementById('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
// Исходные точки квадрата
|
||||
const originalPoints = [
|
||||
[-50, -50, 1],
|
||||
[-25, -50, 1],
|
||||
[0, -50, 1],
|
||||
[25, -50, 1],
|
||||
[50, -50, 1],
|
||||
[50, 50, 1],
|
||||
[-50, 50, 1]
|
||||
];
|
||||
|
||||
// Элементы управления
|
||||
const scaleInput = document.getElementById('scale');
|
||||
const projXInput = document.getElementById('projX');
|
||||
const projYInput = document.getElementById('projY');
|
||||
const rotationInput = document.getElementById('rotation');
|
||||
|
||||
// Отображение значений
|
||||
const scaleValue = document.getElementById('scaleValue');
|
||||
const projXValue = document.getElementById('projXValue');
|
||||
const projYValue = document.getElementById('projYValue');
|
||||
const rotationValue = document.getElementById('rotationValue');
|
||||
|
||||
// Умножение матриц 3x3
|
||||
function multiplyMatrices(a, b) {
|
||||
const result = [
|
||||
[0, 0, 0],
|
||||
[0, 0, 0],
|
||||
[0, 0, 0]
|
||||
];
|
||||
|
||||
for (let i = 0; i < 3; i++) {
|
||||
for (let j = 0; j < 3; j++) {
|
||||
for (let k = 0; k < 3; k++) {
|
||||
result[i][j] += a[i][k] * b[k][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Применение матрицы к точке
|
||||
function applyMatrix(matrix, point) {
|
||||
const x = matrix[0][0] * point[0] + matrix[0][1] * point[1] + matrix[0][2] * point[2];
|
||||
const y = matrix[1][0] * point[0] + matrix[1][1] * point[1] + matrix[1][2] * point[2];
|
||||
const w = matrix[2][0] * point[0] + matrix[2][1] * point[1] + matrix[2][2] * point[2];
|
||||
|
||||
return [x / w, y / w, 1];
|
||||
}
|
||||
|
||||
// Создание матрицы масштабирования
|
||||
function getScaleMatrix(s) {
|
||||
return [
|
||||
[s, 0, 0],
|
||||
[0, s, 0],
|
||||
[0, 0, 1]
|
||||
];
|
||||
}
|
||||
|
||||
// Создание матрицы поворота
|
||||
function getRotationMatrix(angle) {
|
||||
const rad = angle * Math.PI / 180;
|
||||
const cos = Math.cos(rad);
|
||||
const sin = Math.sin(rad);
|
||||
return [
|
||||
[cos, -sin, 0],
|
||||
[sin, cos, 0],
|
||||
[0, 0, 1]
|
||||
];
|
||||
}
|
||||
|
||||
// Создание матрицы проекции
|
||||
function getProjectionMatrix(px, py) {
|
||||
return [
|
||||
[1, 0, 0],
|
||||
[0, 1, 0],
|
||||
[px, py, 1]
|
||||
];
|
||||
}
|
||||
|
||||
// Рисование сетки координат
|
||||
function drawGrid() {
|
||||
ctx.strokeStyle = '#ddd';
|
||||
ctx.lineWidth = 1;
|
||||
|
||||
// Вертикальные линии
|
||||
for (let x = 0; x <= canvas.width; x += 50) {
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(x, 0);
|
||||
ctx.lineTo(x, canvas.height);
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Горизонтальные линии
|
||||
for (let y = 0; y <= canvas.height; y += 50) {
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(0, y);
|
||||
ctx.lineTo(canvas.width, y);
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Оси координат
|
||||
ctx.strokeStyle = '#999';
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(canvas.width / 2, 0);
|
||||
ctx.lineTo(canvas.width / 2, canvas.height);
|
||||
ctx.moveTo(0, canvas.height / 2);
|
||||
ctx.lineTo(canvas.width, canvas.height / 2);
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Рисование четырехугольника
|
||||
function drawQuadrilateral(points) {
|
||||
const centerX = canvas.width / 2;
|
||||
const centerY = canvas.height / 2;
|
||||
|
||||
// Рисуем четырехугольник
|
||||
ctx.fillStyle = 'rgba(66, 153, 225, 0.3)';
|
||||
ctx.strokeStyle = '#2c5aa0';
|
||||
ctx.lineWidth = 2;
|
||||
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(centerX + points[0][0], centerY - points[0][1]);
|
||||
for (let i = 1; i < points.length; i++) {
|
||||
ctx.lineTo(centerX + points[i][0], centerY - points[i][1]);
|
||||
}
|
||||
ctx.closePath();
|
||||
ctx.fill();
|
||||
ctx.stroke();
|
||||
|
||||
// Рисуем точки
|
||||
ctx.fillStyle = '#e53e3e';
|
||||
points.forEach(point => {
|
||||
ctx.beginPath();
|
||||
ctx.arc(centerX + point[0], centerY - point[1], 5, 0, Math.PI * 2);
|
||||
ctx.fill();
|
||||
});
|
||||
}
|
||||
|
||||
// Обновление сцены
|
||||
function update() {
|
||||
const scale = parseFloat(scaleInput.value);
|
||||
const projX = parseFloat(projXInput.value) / 50;
|
||||
const projY = parseFloat(projYInput.value) / 50;
|
||||
const rotation = parseFloat(rotationInput.value);
|
||||
|
||||
// Обновление отображаемых значений
|
||||
scaleValue.textContent = scale.toFixed(1);
|
||||
projXValue.textContent = projX.toFixed(2);
|
||||
projYValue.textContent = projY.toFixed(2);
|
||||
rotationValue.textContent = rotation + '°';
|
||||
|
||||
// Создание матриц трансформации
|
||||
const scaleMatrix = getScaleMatrix(scale);
|
||||
const rotationMatrix = getRotationMatrix(rotation);
|
||||
const projectionMatrix = getProjectionMatrix(projX, projY);
|
||||
|
||||
// Комбинированная матрица: проекция * поворот * масштаб
|
||||
let transformMatrix = multiplyMatrices(scaleMatrix, rotationMatrix);
|
||||
transformMatrix = multiplyMatrices(transformMatrix, projectionMatrix);
|
||||
|
||||
// Применение трансформации к точкам
|
||||
const transformedPoints = originalPoints.map(point =>
|
||||
applyMatrix(transformMatrix, point)
|
||||
);
|
||||
|
||||
// Отрисовка
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
||||
drawGrid();
|
||||
drawQuadrilateral(transformedPoints);
|
||||
}
|
||||
|
||||
// Обработчики событий
|
||||
scaleInput.addEventListener('input', update);
|
||||
projXInput.addEventListener('input', update);
|
||||
projYInput.addEventListener('input', update);
|
||||
rotationInput.addEventListener('input', update);
|
||||
|
||||
// Начальная отрисовка
|
||||
update();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
167
examples/transform_app.py
Normal file
167
examples/transform_app.py
Normal file
@@ -0,0 +1,167 @@
|
||||
from pathlib import Path
|
||||
|
||||
from kivy.app import App
|
||||
from kivy.uix.boxlayout import BoxLayout
|
||||
from kivy.uix.slider import Slider
|
||||
from kivy.uix.label import Label
|
||||
from kivy.uix.image import Image as KivyImage
|
||||
from kivy.graphics.texture import Texture
|
||||
from kivy.core.window import Window
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
import io
|
||||
|
||||
class CameraTransformApp(App):
|
||||
def build(self):
|
||||
Window.size = (900, 700)
|
||||
|
||||
# Главный контейнер
|
||||
main_layout = BoxLayout(orientation='vertical', padding=10, spacing=10)
|
||||
|
||||
# Контейнер для изображения
|
||||
self.image_widget = KivyImage(size_hint=(1, 0.7))
|
||||
main_layout.add_widget(self.image_widget)
|
||||
|
||||
# Контейнер для ползунков
|
||||
controls_layout = BoxLayout(orientation='vertical', size_hint=(1, 0.3), spacing=5)
|
||||
|
||||
# Yaw (рыскание) - вращение вокруг вертикальной оси
|
||||
yaw_layout = BoxLayout(orientation='horizontal', spacing=10, size_hint=(1, None), height=40)
|
||||
yaw_layout.add_widget(Label(text='Рыскание (Yaw):', size_hint=(0.2, 1)))
|
||||
self.yaw_slider = Slider(min=-45, max=45, value=0, size_hint=(0.6, 1))
|
||||
self.yaw_slider.bind(value=self.on_slider_change)
|
||||
self.yaw_label = Label(text='0°', size_hint=(0.2, 1))
|
||||
yaw_layout.add_widget(self.yaw_slider)
|
||||
yaw_layout.add_widget(self.yaw_label)
|
||||
controls_layout.add_widget(yaw_layout)
|
||||
|
||||
# Pitch (тангаж) - наклон вперед/назад
|
||||
pitch_layout = BoxLayout(orientation='horizontal', spacing=10, size_hint=(1, None), height=40)
|
||||
pitch_layout.add_widget(Label(text='Тангаж (Pitch):', size_hint=(0.2, 1)))
|
||||
self.pitch_slider = Slider(min=-45, max=45, value=0, size_hint=(0.6, 1))
|
||||
self.pitch_slider.bind(value=self.on_slider_change)
|
||||
self.pitch_label = Label(text='0°', size_hint=(0.2, 1))
|
||||
pitch_layout.add_widget(self.pitch_slider)
|
||||
pitch_layout.add_widget(self.pitch_label)
|
||||
controls_layout.add_widget(pitch_layout)
|
||||
|
||||
# Roll (крен) - наклон влево/вправо
|
||||
roll_layout = BoxLayout(orientation='horizontal', spacing=10, size_hint=(1, None), height=40)
|
||||
roll_layout.add_widget(Label(text='Крен (Roll):', size_hint=(0.2, 1)))
|
||||
self.roll_slider = Slider(min=-45, max=45, value=0, size_hint=(0.6, 1))
|
||||
self.roll_slider.bind(value=self.on_slider_change)
|
||||
self.roll_label = Label(text='0°', size_hint=(0.2, 1))
|
||||
roll_layout.add_widget(self.roll_slider)
|
||||
roll_layout.add_widget(self.roll_label)
|
||||
controls_layout.add_widget(roll_layout)
|
||||
|
||||
main_layout.add_widget(controls_layout)
|
||||
|
||||
# Создаем тестовое изображение с сеткой
|
||||
self.original_image = self.create_test_image()
|
||||
self.update_image()
|
||||
|
||||
return main_layout
|
||||
|
||||
def create_test_image(self):
|
||||
"""Создает тестовое изображение с сеткой и текстом"""
|
||||
img = cv2.imread(Path('images') / 'photo_610.png')
|
||||
|
||||
return img
|
||||
|
||||
def get_rotation_matrix(self, yaw, pitch, roll):
|
||||
"""Вычисляет матрицу поворота 3D"""
|
||||
# Конвертируем в радианы
|
||||
yaw = np.radians(yaw)
|
||||
pitch = np.radians(pitch)
|
||||
roll = np.radians(roll)
|
||||
|
||||
# Матрица поворота вокруг Z (yaw)
|
||||
Rz = np.array([
|
||||
[np.cos(yaw), -np.sin(yaw), 0],
|
||||
[np.sin(yaw), np.cos(yaw), 0],
|
||||
[0, 0, 1]
|
||||
])
|
||||
|
||||
# Матрица поворота вокруг Y (pitch)
|
||||
Ry = np.array([
|
||||
[np.cos(pitch), 0, -np.sin(pitch)],
|
||||
[0, 1, 0],
|
||||
[np.sin(pitch), 0, np.cos(pitch)]
|
||||
])
|
||||
|
||||
# Матрица поворота вокруг X (roll)
|
||||
Rx = np.array([
|
||||
[1, 0, 0],
|
||||
[0, np.cos(roll), -np.sin(roll)],
|
||||
[0, np.sin(roll), np.cos(roll)]
|
||||
])
|
||||
|
||||
# Комбинируем повороты: R = Rz * Ry * Rx
|
||||
R = Rx @ Ry @ Rz
|
||||
return R
|
||||
|
||||
def apply_perspective_transform(self, img, yaw, pitch, roll):
|
||||
"""Применяет перспективное преобразование к изображению"""
|
||||
h, w = img.shape[:2]
|
||||
|
||||
# Параметры камеры
|
||||
focal_length = w
|
||||
distance = w
|
||||
|
||||
# Матрица камеры
|
||||
camera_matrix = np.array([
|
||||
[focal_length / 2, 0, w / 2],
|
||||
[0, focal_length / 2, h / 2],
|
||||
[0, 0, 1]
|
||||
])
|
||||
|
||||
# Получаем матрицу поворота
|
||||
R = self.get_rotation_matrix(yaw, pitch, roll)
|
||||
|
||||
T = np.array([
|
||||
[1, 0, 0.2],
|
||||
[0, 1, 0.1],
|
||||
[0, 0, 2],
|
||||
])
|
||||
|
||||
# Создаем матрицу трансформации
|
||||
H = camera_matrix @ R @ T @ np.linalg.inv(camera_matrix)
|
||||
|
||||
# Применяем преобразование
|
||||
try:
|
||||
result = cv2.warpPerspective(img, H, (w, h),
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=(50, 50, 50))
|
||||
return result
|
||||
except:
|
||||
return img
|
||||
|
||||
def update_image(self):
|
||||
"""Обновляет изображение с текущими параметрами"""
|
||||
yaw = self.yaw_slider.value
|
||||
pitch = self.pitch_slider.value
|
||||
roll = self.roll_slider.value
|
||||
|
||||
# Применяем трансформацию
|
||||
transformed = self.apply_perspective_transform(
|
||||
self.original_image, yaw, pitch, roll
|
||||
)
|
||||
|
||||
# Конвертируем в текстуру Kivy
|
||||
buf = cv2.flip(transformed, 0).tobytes()
|
||||
texture = Texture.create(size=(transformed.shape[1], transformed.shape[0]),
|
||||
colorfmt='bgr')
|
||||
texture.blit_buffer(buf, colorfmt='bgr', bufferfmt='ubyte')
|
||||
self.image_widget.texture = texture
|
||||
|
||||
def on_slider_change(self, instance, value):
|
||||
"""Обработчик изменения ползунков"""
|
||||
self.yaw_label.text = f'{int(self.yaw_slider.value)}°'
|
||||
self.pitch_label.text = f'{int(self.pitch_slider.value)}°'
|
||||
self.roll_label.text = f'{int(self.roll_slider.value)}°'
|
||||
self.update_image()
|
||||
|
||||
if __name__ == '__main__':
|
||||
CameraTransformApp().run()
|
||||
@@ -1,66 +0,0 @@
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
class Geolocation:
|
||||
""" Класс геопозиции """
|
||||
x: float
|
||||
y: float
|
||||
z: float # Масштаб (Высота)
|
||||
angle: float # Угол направления движения
|
||||
|
||||
def __init__(self, x: float = 0, y: float = 0, z: float = 1, angle: float = 0):
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.z = z
|
||||
self.angle = angle
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (f"Geolocation(x={self.x:.2f}, y={self.y:.2f}, "
|
||||
f"z={self.z:.2f}, angle={self.angle:.1f}°)")
|
||||
|
||||
def __matmul__(self, mat: np.array) -> 'Geolocation':
|
||||
return self.copy().apply(mat)
|
||||
|
||||
def __imatmul__(self, mat: np.array) -> 'Geolocation':
|
||||
return self.apply(mat)
|
||||
|
||||
def apply(self, mat: np.ndarray) -> 'Geolocation':
|
||||
""" На основе матрицы трансформации вычисляет новую позицию и направление """
|
||||
|
||||
tx, ty = -mat[0, 2], -mat[1, 2]
|
||||
|
||||
# Вычисляем угол поворота
|
||||
rotation = -np.arctan2(mat[1, 0], mat[0, 0])
|
||||
scale = np.hypot(mat[0, 0], mat[0, 1])
|
||||
|
||||
# Координаты уже отцентрированы, поэтому используем их напрямую
|
||||
dx_meters = tx
|
||||
dy_meters = ty
|
||||
|
||||
angle_global = self.angle + rotation
|
||||
|
||||
# Применяем поворот к смещению (учитываем текущий угол БПЛА)
|
||||
cos_angle = math.cos(angle_global)
|
||||
sin_angle = math.sin(angle_global)
|
||||
|
||||
# Поворачиваем смещение в глобальные координаты
|
||||
# Обратите внимание: dy_meters инвертирован, так как в изображениях Y направлен вниз
|
||||
dx_global = dx_meters * cos_angle - dy_meters * sin_angle
|
||||
dy_global = dx_meters * sin_angle + dy_meters * cos_angle
|
||||
|
||||
# Обновляем координаты БПЛА
|
||||
self.z /= scale
|
||||
self.x = self.x + dx_global * self.z
|
||||
self.y = self.y + dy_global * self.z
|
||||
|
||||
# Нормализуем угол в диапазоне [-π, π]
|
||||
self.angle = math.atan2(math.sin(angle_global), math.cos(angle_global))
|
||||
|
||||
return self
|
||||
|
||||
def copy(self) -> 'Geolocation':
|
||||
return Geolocation(self.x, self.y, self.z, self.angle)
|
||||
|
||||
@staticmethod
|
||||
def transform(g: 'Geolocation', mat: np.ndarray) -> 'Geolocation | None':
|
||||
return g.copy().apply(mat)
|
||||
89
google_map.py
Normal file
89
google_map.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
from selenium.webdriver.common.actions.wheel_input import ScrollOrigin
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from time import sleep
|
||||
|
||||
import constants
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
import utility
|
||||
|
||||
def generateURL(lat: float, lon: float, zoom: int):
|
||||
return f"https://www.google.com/maps/@{lon},{lat},{zoom}z"
|
||||
|
||||
class GoogleMap:
|
||||
initial_zoom: int
|
||||
initial_lat: float
|
||||
initial_lon: float
|
||||
pixel_ratio: float
|
||||
|
||||
def __init__(self, initial_lat=49.103814, initial_lon=55.794258, initial_zoom=18):
|
||||
self.initial_lat = initial_lat
|
||||
self.initial_lon = initial_lon
|
||||
self.initial_zoom = initial_zoom
|
||||
self.pixel_ratio = constants.GOOGLE_PIXEL_RATIO[self.initial_zoom]
|
||||
|
||||
options = webdriver.ChromeOptions()
|
||||
# options.add_experimental_option("detach", True)
|
||||
self.driver = webdriver.Chrome(options)
|
||||
self.driver.get(generateURL(initial_lat, initial_lon, initial_zoom))
|
||||
self.driver.maximize_window()
|
||||
|
||||
action = ActionChains(self.driver)
|
||||
sleep(5)
|
||||
self.driver.execute_script('document.querySelector(\'.yHc72.qk5Wte\').click()')
|
||||
sleep(5)
|
||||
|
||||
def open(self, lat, lon, zoom):
|
||||
self.initial_lat = lat
|
||||
self.initial_lon = lon
|
||||
self.initial_zoom = zoom
|
||||
self.pixel_ratio = constants.GOOGLE_PIXEL_RATIO[self.initial_zoom]
|
||||
self.driver.get(generateURL(lat, lon, zoom))
|
||||
|
||||
def save_photo(self, filename: str):
|
||||
im = self.make_screenshot()
|
||||
im.save(filename)
|
||||
|
||||
def destroy(self):
|
||||
self.driver.close()
|
||||
|
||||
def get_size(self) -> tuple[int, int]:
|
||||
html = self.driver.find_element(By.TAG_NAME, 'html')
|
||||
return (html.size['width'], html.size['height'])
|
||||
|
||||
def scroll(self, x: float, y: float, count: int = 1, inner_zoom: bool = True):
|
||||
html = self.driver.find_element(By.TAG_NAME, 'html')
|
||||
|
||||
x_offset = (x - 0.5) * (html.size['width'] - 72) + 72
|
||||
y_offset = (y - 0.5) * html.size['height']
|
||||
action = ActionChains(self.driver)
|
||||
|
||||
for i in range(count-1):
|
||||
action.scroll_from_origin(ScrollOrigin(html, int(x_offset), int(y_offset)), 0, -100 if inner_zoom else 100)
|
||||
action.perform()
|
||||
if i != count - 1:
|
||||
sleep(0.25)
|
||||
|
||||
def move(self, dx: float, dy: float):
|
||||
self.driver.execute_script(utility.google_map_js_move_script(dx, dy))
|
||||
|
||||
def make_as_center(self, x: float, y: float):
|
||||
dx = (x - 0.5) * self.get_size()[0]
|
||||
dy = (0.5 - y) * self.get_size()[1]
|
||||
self.move(dx, dy)
|
||||
sleep(1)
|
||||
|
||||
def make_screenshot(self) -> Image.Image:
|
||||
png = self.driver.get_screenshot_as_png()
|
||||
im = Image.open(BytesIO(png))
|
||||
return utility.cv2_to_pil(np.array(im)[:, 72:])
|
||||
|
||||
def get_geolocation(self):
|
||||
current_url = self.driver.current_url
|
||||
return utility.parse_google_maps_url(current_url)
|
||||
388
main.py
388
main.py
@@ -1,23 +1,33 @@
|
||||
from google_map import GoogleMap
|
||||
from pathlib import Path
|
||||
from position import Position
|
||||
from simulator import Simulator
|
||||
from time import sleep
|
||||
from trajectory_drawer import TrajectoryDrawer
|
||||
from utility import cv2_to_pil
|
||||
from vision_chunk import VisionChunk
|
||||
from visualization import VisualizationManager
|
||||
from yandex_map import YandexMap
|
||||
from vision_chunk import VisionChunk
|
||||
from utility import cv2_to_pil
|
||||
import random
|
||||
|
||||
import argparse
|
||||
import autopilot
|
||||
import constants
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pickle
|
||||
import random
|
||||
import utility
|
||||
|
||||
import autopilot
|
||||
def get_map(map_name: str = 'google', lat=49.103814, lon=55.794258, zoom=18):
|
||||
if map_name == 'google': return GoogleMap(lat, lon, zoom)
|
||||
if map_name == 'yandex': return YandexMap(lat, lon, zoom)
|
||||
return None
|
||||
|
||||
def make_global_photo(filename):
|
||||
yandexMap = YandexMap()
|
||||
yandexMap.save_photo(filename)
|
||||
yandexMap.destroy()
|
||||
def make_global_photo(filename, map_name: str = 'google', lat=49.103814, lon=55.794258, zoom=13):
|
||||
online_map: YandexMap | GoogleMap = get_map(map_name, lat, lon, zoom)
|
||||
online_map.save_photo(filename)
|
||||
online_map.destroy()
|
||||
|
||||
def get_trajectory_points(bg_img: str) -> list[(float, float)]:
|
||||
trajectoryDrawer = TrajectoryDrawer(bg_img)
|
||||
@@ -26,97 +36,132 @@ def get_trajectory_points(bg_img: str) -> list[(float, float)]:
|
||||
points = list(map(lambda p: [p[0] / trajectoryDrawer.img.shape[1], p[1] / trajectoryDrawer.img.shape[0]], trajectoryDrawer.points))
|
||||
return points
|
||||
|
||||
def main():
|
||||
# Скриншот местности
|
||||
# make_global_photo('map.jpg')
|
||||
def build(name: str, map_name: str, lat: float, lon: float):
|
||||
|
||||
# Получаем траекторию от пользователя
|
||||
# points = get_trajectory_points('map.jpg')
|
||||
# Создание папки с информацией о маршруте
|
||||
dir = Path('trajectories')
|
||||
if not dir.exists(): dir.mkdir()
|
||||
dir /= name
|
||||
assert not dir.exists()
|
||||
dir.mkdir()
|
||||
dir_chunks = dir / 'chunks'
|
||||
dir_chunks.mkdir()
|
||||
|
||||
# Trajectory #1
|
||||
# points = [[np.float64(0.5384504359393909), np.float64(0.4084520767967683)], [np.float64(0.4451750568707629), np.float64(0.38213330305374654)], [np.float64(0.49266070439660997), np.float64(0.2789637099811013)], [np.float64(0.36377108968359656), np.float64(0.3263375027185404)], [np.float64(0.3535955937852008), np.float64(0.4337180995900692)]]
|
||||
make_global_photo('map.jpg', map_name, lat, lon, 15)
|
||||
points = get_trajectory_points('map.jpg')
|
||||
online_map: YandexMap | GoogleMap = get_map(map_name, lat, lon, 15)
|
||||
|
||||
# Trajectory #2
|
||||
# points = [[np.float64(0.29197731306713737), np.float64(0.3452870198135161)], [np.float64(0.33494051797147517), np.float64(0.2010601397017569)], [np.float64(0.39768940934491587), np.float64(0.25369768718780034)], [np.float64(0.4027771572941138), np.float64(0.4158213334448144)], [np.float64(0.2914120077394487), np.float64(0.5547844588079692)]]
|
||||
|
||||
# Trajectory #3
|
||||
# points = [[np.float64(0.2755834585641664), np.float64(0.45687862048392835)], [np.float64(0.295934450360958), np.float64(0.5021469113219258)], [np.float64(0.32872215936689997), np.float64(0.4810918923275084)], [np.float64(0.3649017003389739), np.float64(0.5295184360146684)], [np.float64(0.3999506306556705), np.float64(0.49477765467387963)]]
|
||||
|
||||
# Trajectory #4
|
||||
# points = [[np.float64(0.42143223310783934), np.float64(0.6663760594783815)], [np.float64(0.4253893704016599), np.float64(0.5537317078582484)], [np.float64(0.5124463908657128), np.float64(0.5621537154560153)], [np.float64(0.5124463908657128), np.float64(0.6684815613778233)], [np.float64(0.42143223310783934), np.float64(0.6663760594783815)]]
|
||||
|
||||
# Trajectory #5
|
||||
# points = [[np.float64(0.5983728006743884), np.float64(0.7348048712102382)], [np.float64(0.5966768846913225), np.float64(0.5453097002604814)], [np.float64(0.6345523416464622), np.float64(0.7190136069644251)], [np.float64(0.6402053949233488), np.float64(0.5495207040593649)], [np.float64(0.5983728006743884), np.float64(0.7348048712102382)]]
|
||||
|
||||
# Trajectory #6
|
||||
# points = [[np.float64(0.4406526142492536), np.float64(0.28106921188054296)], [np.float64(0.38581799746345413), np.float64(0.2968604761263561)], [np.float64(0.3931669667234066), np.float64(0.353709027411283)], [np.float64(0.4248240650739713), np.float64(0.35265627646156217)], [np.float64(0.40616898926024564), np.float64(0.3179154951207735)]]
|
||||
|
||||
# Trajectory #7
|
||||
# points = [[np.float64(0.5491912371654754), np.float64(0.7505961354560512)], [np.float64(0.5537136797869846), np.float64(0.6863783275230781)], [np.float64(0.5017055896396284), np.float64(0.6653233085286606)], [np.float64(0.5520177638039186), np.float64(0.6042637534448503)], [np.float64(0.5593667330638712), np.float64(0.516885424618018)]]
|
||||
|
||||
# Trajectory #8
|
||||
# points =
|
||||
|
||||
# Trajectory #9
|
||||
# points =
|
||||
|
||||
# Trajectory #10
|
||||
# points =
|
||||
|
||||
print(points)
|
||||
|
||||
# Для каждой точки сделаем приближенный снимок
|
||||
yandexMap = YandexMap()
|
||||
chunks: list[VisionChunk] = []
|
||||
|
||||
plt.ion()
|
||||
for i in range(len(points)):
|
||||
point = points[i]
|
||||
yandexMap.scroll(point[0], point[1], 5, True)
|
||||
sleep(1)
|
||||
cv2_img = yandexMap.make_screenshot(point[0], point[1], 0.2, 0.2)
|
||||
img = cv2_to_pil(cv2_img)
|
||||
chunk = VisionChunk(img)
|
||||
Path('chunks').mkdir(exist_ok=True)
|
||||
chunk.save_image(Path('.') / 'chunks' / f'chunk_{i}.png')
|
||||
plt.subplot(1, len(points), i+1)
|
||||
plt.imshow(img)
|
||||
plt.pause(0.25)
|
||||
yandexMap.scroll(point[0], point[1], 5, False)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Выделим на каждой картинке ключевые точки
|
||||
for i in range(len(points)):
|
||||
chunk = VisionChunk.load_image(Path('chunks') / f'chunk_{i}.png')
|
||||
chunks.append(chunk)
|
||||
kp, des = chunk.compute_keypoints()
|
||||
|
||||
plt.subplot(1, len(points), i+1)
|
||||
plt.imshow(chunk.image)
|
||||
kp_coords = np.array([j.pt for j in kp])
|
||||
if len(kp_coords) > 0:
|
||||
plt.scatter(kp_coords[:, 0], kp_coords[:, 1], c='red', s=20, alpha=0.7, marker='o')
|
||||
plt.pause(0.2)
|
||||
plt.ioff()
|
||||
|
||||
plt.show(block=True)
|
||||
|
||||
# Начнём симуляцию полёта с первой точки
|
||||
yandexMap.scroll(points[0][0], points[0][1], 5, True)
|
||||
sleep(0.2)
|
||||
yandexMap.make_as_center(*points[0])
|
||||
sleep(1)
|
||||
|
||||
vis_manager = VisualizationManager()
|
||||
width, height = yandexMap.get_size()
|
||||
# print(width, height)
|
||||
width, height = online_map.get_size()
|
||||
points_coords = np.array(list(map(lambda p: [
|
||||
(p[0] - points[0][0]) * width, (points[0][1] - p[1]) * height
|
||||
], points)))
|
||||
points_coords *= 2 ** 4
|
||||
pilot = autopilot.AutoPilot(points_coords, chunks, vis_manager)
|
||||
simulator = Simulator(yandexMap)
|
||||
|
||||
points_coords *= online_map.pixel_ratio
|
||||
|
||||
# Начнём симуляцию полёта с первой точки
|
||||
online_map.make_as_center(*points[0])
|
||||
sleep(3)
|
||||
online_map.scroll(0.5, 0.5, 10, True)
|
||||
sleep(2)
|
||||
geo = online_map.get_geolocation()
|
||||
online_map.open(geo['lat'], geo['lon'], 18)
|
||||
sleep(5)
|
||||
|
||||
points_coords_pixel = points_coords.copy() / online_map.pixel_ratio
|
||||
|
||||
pilot = autopilot.AutoPilot(points_coords_pixel, pixel_ratio=online_map.pixel_ratio)
|
||||
simulator = Simulator(online_map)
|
||||
pilot.target_idx = 0
|
||||
|
||||
pilot.pos = simulator.pos.copy()
|
||||
command = pilot.make_command()
|
||||
|
||||
positions: list[Position] = []
|
||||
|
||||
print("points_coords_pixel:", points_coords_pixel)
|
||||
for i in range(5000):
|
||||
|
||||
if command.stop:
|
||||
break
|
||||
|
||||
chunk = simulator.get_chunk()
|
||||
pilot.pos = simulator.pos.copy()
|
||||
command = pilot.make_command()
|
||||
command.velocity /= online_map.pixel_ratio
|
||||
|
||||
print("Position:", simulator.pos)
|
||||
|
||||
# Save Image
|
||||
chunk.save_image(dir_chunks / f"chunk_{len(positions)}.png")
|
||||
|
||||
positions.append(simulator.pos.copy() * online_map.pixel_ratio)
|
||||
|
||||
simulator.handle(command.dangle, command.velocity)
|
||||
if i == 0 and map_name == 'google':
|
||||
simulator.pos.x = 0
|
||||
simulator.pos.y = 0
|
||||
sleep(1.5)
|
||||
|
||||
data = {
|
||||
'points': points_coords,
|
||||
'chunk_positions': positions,
|
||||
'initial_geolocation': geo
|
||||
}
|
||||
|
||||
print(points_coords)
|
||||
|
||||
file_positions = dir / 'positions.pkl'
|
||||
with file_positions.open('wb') as file:
|
||||
pickle.dump(data, file)
|
||||
|
||||
print("WRITE POINTS:", points)
|
||||
|
||||
sleep(15)
|
||||
online_map.destroy()
|
||||
|
||||
def run(name: str, map_name: str, ref_min_distance: float):
|
||||
dir = Path('trajectories')
|
||||
assert dir.exists()
|
||||
dir /= name
|
||||
assert dir.exists(), "Укажите корректное название маршрута"
|
||||
dir_chunks = dir / 'chunks'
|
||||
file_positions = dir / 'positions.pkl'
|
||||
|
||||
with file_positions.open('rb') as file:
|
||||
data = pickle.load(file)
|
||||
|
||||
initial_geolocation = data['initial_geolocation']
|
||||
|
||||
lat = initial_geolocation['lat']
|
||||
lon = initial_geolocation['lon']
|
||||
online_map: YandexMap | GoogleMap = get_map(map_name, lat, lon, 18)
|
||||
sleep(2)
|
||||
|
||||
chunks: list[VisionChunk] = []
|
||||
for i in range(len(data['chunk_positions'])):
|
||||
pos = data['chunk_positions'][i]
|
||||
if len(chunks) == 0 or np.hypot(chunks[-1].pos.x - pos.x, chunks[-1].pos.y - pos.y) > ref_min_distance:
|
||||
chunk = VisionChunk.load_image(dir_chunks / f"chunk_{i}.png")
|
||||
chunk.pos = data['chunk_positions'][i] / online_map.pixel_ratio
|
||||
chunks.append(chunk)
|
||||
|
||||
r = 0
|
||||
for i in range(len(data['points']) - 1):
|
||||
r += np.hypot(
|
||||
data['points'][i][0] - data['points'][i+1][0],
|
||||
data['points'][i][1] - data['points'][i+1][1]
|
||||
)
|
||||
print("R: ", r)
|
||||
|
||||
return
|
||||
|
||||
points = data['points'] / online_map.pixel_ratio
|
||||
print("READ POINTS:", points)
|
||||
|
||||
vis_manager = VisualizationManager()
|
||||
pilot = autopilot.AutoPilot(points, chunks, vis_manager, online_map.pixel_ratio)
|
||||
simulator = Simulator(online_map)
|
||||
pilot.target_idx = 0
|
||||
|
||||
chunk = simulator.get_chunk()
|
||||
@@ -125,28 +170,26 @@ def main():
|
||||
vis_manager.update_display()
|
||||
vis_manager.pause(1)
|
||||
|
||||
vis_manager.set_target_points(points_coords)
|
||||
vis_manager.set_target_points(data['points'])
|
||||
|
||||
proc_time = np.array([])
|
||||
|
||||
zoom_next_event = random.randint(5, 10)
|
||||
|
||||
errors = []
|
||||
chunk_errors = []
|
||||
chunk_improves = []
|
||||
|
||||
last_chunk_index = 0
|
||||
|
||||
sleep(1)
|
||||
|
||||
for i in range(10000000000):
|
||||
print(f"Image #{i}")
|
||||
if i == zoom_next_event:
|
||||
r = random.randint(0, 1)
|
||||
direction = ['up', 'down'][r]
|
||||
simulator.change_zoom(direction)
|
||||
zoom_next_event = i + random.randint(20, 40)
|
||||
if i > 0:
|
||||
simulator.set_pitch(np.sin(i / 10) * 5)
|
||||
simulator.set_roll(np.sin(i / 15) * 5)
|
||||
simulator.set_zoom(1.0 + np.sin(i / 10) * 0.3)
|
||||
|
||||
if command.stop:
|
||||
break
|
||||
|
||||
chunk = simulator.get_chunk()
|
||||
command = pilot.handle(chunk)
|
||||
command.velocity /= online_map.pixel_ratio
|
||||
|
||||
proc_time = np.append(proc_time, command.proccessing_time)
|
||||
|
||||
@@ -157,36 +200,133 @@ def main():
|
||||
vis_manager.pause(0.2)
|
||||
|
||||
vis_manager.set_target_index(pilot.target_idx)
|
||||
vis_manager.update_drone_trajectory(pilot.geo.x, pilot.geo.y)
|
||||
vis_manager.update_global_map(simulator.geo.x, simulator.geo.y)
|
||||
vis_manager.update_error_plot(i, pilot.geo.x, pilot.geo.y, simulator.geo.x, simulator.geo.y)
|
||||
vis_manager.update_drone_trajectory(pilot.pos.x * online_map.pixel_ratio, pilot.pos.y * online_map.pixel_ratio)
|
||||
vis_manager.update_global_map(simulator.pos.x * online_map.pixel_ratio, simulator.pos.y * online_map.pixel_ratio)
|
||||
vis_manager.update_error_plot(i, pilot.pos.x * online_map.pixel_ratio, pilot.pos.y * online_map.pixel_ratio, simulator.pos.x * online_map.pixel_ratio, simulator.pos.y * online_map.pixel_ratio)
|
||||
|
||||
errors.append(np.hypot(pilot.geo.x - simulator.geo.x, pilot.geo.y - simulator.geo.y))
|
||||
if last_chunk_index != pilot.target_idx:
|
||||
last_chunk_index = pilot.target_idx
|
||||
chunk_errors.append(errors[-1])
|
||||
chunk_improves.append(errors[-1] - errors[max(len(errors) - 2, 0)])
|
||||
|
||||
if command.stop:
|
||||
break
|
||||
|
||||
simulator.handle(command.dangle, command.velocity)
|
||||
errors.append(np.hypot((pilot.pos.x - simulator.pos.x) * online_map.pixel_ratio, (pilot.pos.y - simulator.pos.y) * online_map.pixel_ratio))
|
||||
|
||||
vis_manager.update_display()
|
||||
vis_manager.pause(0.2)
|
||||
|
||||
last_proc_times = proc_time[-10:]
|
||||
last_proc_times = proc_time[-30:]
|
||||
print(F"\nImage #{i}")
|
||||
print("Average FPS:", 1 / last_proc_times.mean())
|
||||
print("Pilot coords:", pilot.pos)
|
||||
print("Simulator coords:", simulator.pos)
|
||||
sleep(0.5)
|
||||
simulator.handle(command.dangle, command.velocity)
|
||||
if i == 0 and map_name == 'google':
|
||||
simulator.pos.x = 0
|
||||
simulator.pos.y = 0
|
||||
|
||||
print("Errors:", errors)
|
||||
print("MSE:", (np.array(errors) ** 2).mean())
|
||||
print("RMSE:", (np.array(errors) ** 2).mean() ** 0.5)
|
||||
print("Chunk errors:", chunk_errors)
|
||||
print("Chunk error improves:", chunk_improves)
|
||||
print("Average FPS:", 1 / proc_time.mean())
|
||||
vis_manager.show_final()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
def parse_args():
|
||||
"""Парсер аргументов командной строки"""
|
||||
parser = argparse.ArgumentParser(description='Обработка траекторий')
|
||||
|
||||
#TODO
|
||||
# Добавляем обязательный аргумент --mode
|
||||
parser.add_argument(
|
||||
'--mode',
|
||||
type=str,
|
||||
required=True,
|
||||
choices=['standalone', 'build', 'run'],
|
||||
help='Режим работы: standalone, build или run'
|
||||
)
|
||||
|
||||
# Добавляем опциональный аргумент --name
|
||||
parser.add_argument(
|
||||
'--name',
|
||||
type=str,
|
||||
default=utility.generate_folder_name(),
|
||||
help='Название траектории'
|
||||
)
|
||||
|
||||
# Координаты
|
||||
parser.add_argument(
|
||||
'--lat',
|
||||
type=float,
|
||||
default=49.103814,
|
||||
help='Широта (по умолчанию 49.103814)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--lon',
|
||||
type=float,
|
||||
default=55.794258,
|
||||
help='Долгота (по умолчанию 55.794258)'
|
||||
)
|
||||
|
||||
# Источник эталонных изображений (ориентиров)
|
||||
parser.add_argument(
|
||||
'--reference',
|
||||
type=str,
|
||||
default='google',
|
||||
choices=['google', 'yandex'],
|
||||
help='Откуда берутся эталонные изображения (ориентиры): google или yandex (по умолчанию google)'
|
||||
)
|
||||
|
||||
# Место проведения симуляции
|
||||
parser.add_argument(
|
||||
'--simulation',
|
||||
type=str,
|
||||
default='yandex',
|
||||
choices=['google', 'yandex'],
|
||||
help='Где проводится симуляция: google или yandex (по умолчанию yandex)'
|
||||
)
|
||||
|
||||
# Место проведения симуляции
|
||||
parser.add_argument(
|
||||
'--ref-min-distance',
|
||||
type=float,
|
||||
default=100,
|
||||
help='Минимальное расстояние между эталонами'
|
||||
)
|
||||
|
||||
# Место проведения симуляции
|
||||
parser.add_argument(
|
||||
'--debug-fps',
|
||||
action='store_true',
|
||||
help='Включить отладку FPS'
|
||||
)
|
||||
|
||||
# Место проведения симуляции
|
||||
parser.add_argument(
|
||||
'--debug-landmark',
|
||||
action='store_true',
|
||||
help='Включить отладку эталонов'
|
||||
)
|
||||
|
||||
# Парсим аргументы
|
||||
args = parser.parse_args()
|
||||
|
||||
# Проверяем, что для build и run указан --name
|
||||
if args.mode in ['build', 'run'] and not args.name:
|
||||
parser.error(f"--name обязателен для режима {args.mode}")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
name = args.name
|
||||
mode = args.mode
|
||||
sim: str = args.simulation
|
||||
ref: str = args.reference
|
||||
lat: float = args.lat
|
||||
lon: float = args.lon
|
||||
rmd: float = args.ref_min_distance
|
||||
|
||||
constants.DEBUG_FPS = args.debug_fps
|
||||
constants.DEBUG_LANDMARK = args.debug_landmark
|
||||
|
||||
if mode == 'build' or mode == 'standalone':
|
||||
build(name, ref, lat, lon)
|
||||
|
||||
if mode == 'run' or mode == 'standalone':
|
||||
run(name, sim, rmd)
|
||||
|
||||
BIN
map.jpg
BIN
map.jpg
Binary file not shown.
|
Before Width: | Height: | Size: 3.4 MiB After Width: | Height: | Size: 3.3 MiB |
0
models/SiaN/.gitignore
vendored
Normal file
0
models/SiaN/.gitignore
vendored
Normal file
295
models/SiaN/README_homography.md
Normal file
295
models/SiaN/README_homography.md
Normal file
@@ -0,0 +1,295 @@
|
||||
# Homography Estimation System
|
||||
|
||||
This system provides a complete pipeline for estimating homography matrices between Google and Yandex map images using deep learning.
|
||||
|
||||
## Overview
|
||||
|
||||
Homography estimation is crucial for aligning images from different sources (Google Maps and Yandex Maps in this case). The system includes:
|
||||
|
||||
1. **Dataset handling** - Loading and preprocessing image pairs
|
||||
2. **Data augmentation** - Homography-based augmentation for robust training
|
||||
3. **CNN model** - Deep learning model for homography estimation
|
||||
4. **Training pipeline** - Complete training and evaluation workflow
|
||||
5. **Inference tools** - Tools for using trained models on new data
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
- Python 3.8+
|
||||
- PyTorch 1.9+
|
||||
- OpenCV
|
||||
- PIL/Pillow
|
||||
- NumPy
|
||||
|
||||
### Install dependencies
|
||||
```bash
|
||||
pip install torch torchvision opencv-python pillow numpy matplotlib tqdm tensorboard
|
||||
```
|
||||
|
||||
## Dataset Structure
|
||||
|
||||
The system expects image pairs in the following format:
|
||||
```
|
||||
dataset/
|
||||
├── 0000_google.png
|
||||
├── 0000_yandex.png
|
||||
├── 0001_google.png
|
||||
├── 0001_yandex.png
|
||||
└── ...
|
||||
```
|
||||
|
||||
Each pair consists of:
|
||||
- `{idx:04d}_google.png` - Google map image
|
||||
- `{idx:04d}_yandex.png` - Yandex map image
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Explore the dataset
|
||||
```python
|
||||
from models.homography import HomographyDataset
|
||||
|
||||
dataset = HomographyDataset(
|
||||
root_dir=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
augment=True,
|
||||
image_size=(256, 256)
|
||||
)
|
||||
|
||||
print(f"Found {len(dataset)} image pairs")
|
||||
sample = dataset[0]
|
||||
print(f"Sample homography matrix:\n{sample['homography'].numpy()}")
|
||||
```
|
||||
|
||||
### 2. Train a model
|
||||
```bash
|
||||
python models/train_homography.py \
|
||||
--data_dir "C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images" \
|
||||
--epochs 50 \
|
||||
--batch_size 16 \
|
||||
--lr 1e-3 \
|
||||
--output_dir "runs/my_experiment"
|
||||
```
|
||||
|
||||
### 3. Perform inference
|
||||
```bash
|
||||
python models/infer_homography.py \
|
||||
--model_path "runs/my_experiment/checkpoint_best.pth" \
|
||||
--mode single \
|
||||
--google_path "path/to/google.png" \
|
||||
--yandex_path "path/to/yandex.png" \
|
||||
--output_vis "alignment_result.png"
|
||||
```
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
models/
|
||||
├── homography.py # Dataset class and data loaders
|
||||
├── homography_cnn.py # CNN model architecture
|
||||
├── train_homography.py # Training script
|
||||
├── infer_homography.py # Inference script
|
||||
├── example_homography.py # Example usage
|
||||
├── homography.ipynb # Jupyter notebook (empty)
|
||||
└── README_homography.md # This file
|
||||
```
|
||||
|
||||
## Model Architecture
|
||||
|
||||
The homography estimation model (`HomographyCNN`) consists of:
|
||||
|
||||
1. **Dual encoders** - Separate feature extraction for Google and Yandex images
|
||||
2. **Residual blocks** - For deep feature learning
|
||||
3. **Fusion layers** - Combine features from both images
|
||||
4. **Regression head** - Predict 3x3 homography matrix
|
||||
|
||||
### Key Features:
|
||||
- Residual connections for stable training
|
||||
- Batch normalization options
|
||||
- Dropout for regularization
|
||||
- Geometric consistency loss
|
||||
|
||||
## Training Configuration
|
||||
|
||||
Default training parameters:
|
||||
- **Optimizer**: Adam with learning rate 1e-3
|
||||
- **Loss function**: Combined matrix + geometric + regularization loss
|
||||
- **Batch size**: 32
|
||||
- **Image size**: 256x256
|
||||
- **Train/val split**: 80/20
|
||||
|
||||
## Inference Modes
|
||||
|
||||
The inference script supports three modes:
|
||||
|
||||
### 1. Single image pair
|
||||
```bash
|
||||
python infer_homography.py --mode single \
|
||||
--google_path google.png --yandex_path yandex.png
|
||||
```
|
||||
|
||||
### 2. Dataset evaluation
|
||||
```bash
|
||||
python infer_homography.py --mode dataset \
|
||||
--dataset_dir path/to/dataset --num_samples 100
|
||||
```
|
||||
|
||||
### 3. Batch processing
|
||||
```bash
|
||||
python infer_homography.py --mode batch \
|
||||
--input_dir path/to/input --output_dir path/to/output
|
||||
```
|
||||
|
||||
## Evaluation Metrics
|
||||
|
||||
The system computes several metrics:
|
||||
- **Matrix MSE**: Mean squared error of homography matrix elements
|
||||
- **Corner error**: Average pixel error at image corners
|
||||
- **Geometric consistency**: Warping error across grid points
|
||||
|
||||
## Data Augmentation
|
||||
|
||||
The dataset applies homography-based augmentation:
|
||||
- Random rotation (-30° to 30°)
|
||||
- Random scaling (0.8x to 1.2x)
|
||||
- Random translation (-50 to 50 pixels)
|
||||
- Small perspective distortion
|
||||
|
||||
## Integration with Autopilot System
|
||||
|
||||
The homography estimation can be integrated into the autopilot system:
|
||||
|
||||
```python
|
||||
from models.infer_homography import HomographyInference
|
||||
|
||||
# Initialize inference
|
||||
inference = HomographyInference(model_path="path/to/model.pth")
|
||||
|
||||
# During flight loop
|
||||
homography = inference.predict(google_img, yandex_img)
|
||||
# Use homography to update drone position
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Out of memory**
|
||||
- Reduce batch size
|
||||
- Use smaller image size
|
||||
- Enable gradient checkpointing
|
||||
|
||||
2. **Poor convergence**
|
||||
- Adjust learning rate
|
||||
- Increase model capacity
|
||||
- Add more data augmentation
|
||||
|
||||
3. **Inference errors**
|
||||
- Check image formats (must be RGB)
|
||||
- Verify model was trained with same image size
|
||||
- Ensure proper normalization
|
||||
|
||||
### Debugging Tips
|
||||
|
||||
- Use `example_homography.py` to test components
|
||||
- Enable TensorBoard for training visualization
|
||||
- Check homography matrix normalization (last element should be ~1)
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Custom Model Architecture
|
||||
```python
|
||||
from models.homography_cnn import create_homography_model
|
||||
|
||||
model = create_homography_model(
|
||||
model_type="cnn",
|
||||
input_size=(512, 512),
|
||||
hidden_channels=128,
|
||||
num_blocks=6,
|
||||
dropout_rate=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Custom Loss Function
|
||||
```python
|
||||
from models.homography_cnn import HomographyLoss
|
||||
|
||||
loss_fn = HomographyLoss(
|
||||
matrix_weight=0.7,
|
||||
geometric_weight=0.3,
|
||||
reg_weight=0.05,
|
||||
grid_size=16
|
||||
)
|
||||
```
|
||||
|
||||
### Transfer Learning
|
||||
```python
|
||||
# Load pretrained model
|
||||
checkpoint = torch.load("pretrained.pth")
|
||||
model.load_state_dict(checkpoint["model_state_dict"])
|
||||
|
||||
# Fine-tune on new data
|
||||
for param in model.google_encoder.parameters():
|
||||
param.requires_grad = False # Freeze encoder
|
||||
```
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Training speed**
|
||||
- Use multiple GPU workers for data loading
|
||||
- Enable mixed precision training
|
||||
- Use gradient accumulation for larger effective batch sizes
|
||||
|
||||
2. **Memory efficiency**
|
||||
- Use gradient checkpointing
|
||||
- Implement progressive resizing
|
||||
- Use memory-efficient optimizers
|
||||
|
||||
3. **Inference speed**
|
||||
- Use TensorRT or ONNX for deployment
|
||||
- Implement model quantization
|
||||
- Use batch inference when possible
|
||||
|
||||
## Future Improvements
|
||||
|
||||
1. **Model enhancements**
|
||||
- Transformer-based architecture
|
||||
- Multi-scale feature fusion
|
||||
- Uncertainty estimation
|
||||
|
||||
2. **Training improvements**
|
||||
- Self-supervised pre-training
|
||||
- Curriculum learning
|
||||
- Adversarial training
|
||||
|
||||
3. **Deployment features**
|
||||
- Real-time inference optimization
|
||||
- Mobile deployment
|
||||
- Web API
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this system in your research, please cite:
|
||||
|
||||
```bibtex
|
||||
@software{homography_estimation_2024,
|
||||
title = {Homography Estimation for Map Alignment},
|
||||
author = {Autopilot Team},
|
||||
year = {2024},
|
||||
url = {https://github.com/your-repo/homography-estimation}
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the LICENSE file for details.
|
||||
|
||||
## Support
|
||||
|
||||
For questions and issues:
|
||||
1. Check the troubleshooting section
|
||||
2. Review the example scripts
|
||||
3. Open an issue on GitHub
|
||||
4. Contact the development team
|
||||
|
||||
---
|
||||
|
||||
*Last updated: October 2024*
|
||||
345
models/SiaN/example_homography.py
Normal file
345
models/SiaN/example_homography.py
Normal file
@@ -0,0 +1,345 @@
|
||||
"""
|
||||
Example script demonstrating the complete homography estimation workflow.
|
||||
|
||||
This script shows how to:
|
||||
1. Load the ya_go_maps dataset
|
||||
2. Create and train a homography estimation model
|
||||
3. Perform inference on new image pairs
|
||||
4. Visualize results
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from models.homography import HomographyDataset, create_data_loaders
|
||||
from models.homography_cnn import HomographyCNN, HomographyLoss, create_homography_model
|
||||
from models.infer_homography import HomographyInference
|
||||
from models.train_homography import HomographyTrainer
|
||||
|
||||
|
||||
def example_dataset_loading():
|
||||
"""Example 1: Loading and exploring the dataset."""
|
||||
print("=" * 60)
|
||||
print("Example 1: Loading and exploring the dataset")
|
||||
print("=" * 60)
|
||||
|
||||
# Path to the dataset
|
||||
dataset_path = r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images"
|
||||
|
||||
# Create dataset
|
||||
dataset = HomographyDataset(
|
||||
root_dir=dataset_path,
|
||||
augment=True,
|
||||
image_size=(256, 256),
|
||||
cache_homographies=True,
|
||||
)
|
||||
|
||||
print(f"Dataset size: {len(dataset)} image pairs")
|
||||
|
||||
# Get a sample
|
||||
sample = dataset[0]
|
||||
print(f"\nSample keys: {list(sample.keys())}")
|
||||
print(f"Google image shape: {sample['google_img'].shape}")
|
||||
print(f"Yandex image shape: {sample['yandex_img'].shape}")
|
||||
print(f"Homography shape: {sample['homography'].shape}")
|
||||
|
||||
# Show sample homography matrix
|
||||
print(f"\nSample homography matrix:")
|
||||
print(sample["homography"].numpy())
|
||||
|
||||
# Get sample without augmentation for visualization
|
||||
raw_sample = dataset.get_sample_without_augmentation(0)
|
||||
|
||||
# Visualize sample
|
||||
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
|
||||
|
||||
axes[0].imshow(raw_sample["google_img"])
|
||||
axes[0].set_title("Google Map")
|
||||
axes[0].axis("off")
|
||||
|
||||
axes[1].imshow(raw_sample["yandex_img"])
|
||||
axes[1].set_title("Yandex Map")
|
||||
axes[1].axis("off")
|
||||
|
||||
plt.suptitle("Sample Image Pair (without augmentation)")
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def example_model_creation():
|
||||
"""Example 2: Creating and testing the model."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Example 2: Creating and testing the model")
|
||||
print("=" * 60)
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# Create model
|
||||
model = HomographyCNN(
|
||||
input_channels=3,
|
||||
hidden_channels=64,
|
||||
num_blocks=4,
|
||||
dropout_rate=0.3,
|
||||
use_batch_norm=True,
|
||||
).to(device)
|
||||
|
||||
print(
|
||||
f"Model created with {sum(p.numel() for p in model.parameters()):,} parameters"
|
||||
)
|
||||
|
||||
# Create dummy input
|
||||
batch_size = 2
|
||||
height, width = 256, 256
|
||||
|
||||
google_img = torch.randn(batch_size, 3, height, width).to(device)
|
||||
yandex_img = torch.randn(batch_size, 3, height, width).to(device)
|
||||
|
||||
# Test forward pass
|
||||
print("\nTesting forward pass...")
|
||||
output = model(google_img, yandex_img, return_matrix=True)
|
||||
print(f"Output shape: {output.shape}") # Should be (2, 3, 3)
|
||||
print(f"Sample output matrix:")
|
||||
print(output[0].cpu().detach().numpy())
|
||||
|
||||
# Test loss function
|
||||
print("\nTesting loss function...")
|
||||
target_homography = torch.eye(3).unsqueeze(0).expand(batch_size, -1, -1).to(device)
|
||||
loss_fn = HomographyLoss(
|
||||
matrix_weight=1.0,
|
||||
geometric_weight=0.5,
|
||||
reg_weight=0.1,
|
||||
grid_size=8,
|
||||
).to(device)
|
||||
|
||||
loss = loss_fn(output, target_homography, google_img, yandex_img)
|
||||
print(f"Loss value: {loss.item():.6f}")
|
||||
|
||||
# Test metrics
|
||||
print("\nTesting metrics...")
|
||||
metrics = loss_fn.compute_metrics(output, target_homography)
|
||||
for key, value in metrics.items():
|
||||
print(f"{key}: {value:.6f}")
|
||||
|
||||
return model, loss_fn
|
||||
|
||||
|
||||
def example_data_loaders():
|
||||
"""Example 3: Creating data loaders for training."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Example 3: Creating data loaders for training")
|
||||
print("=" * 60)
|
||||
|
||||
# Path to the dataset
|
||||
dataset_path = r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images"
|
||||
|
||||
# Create data loaders
|
||||
train_loader, val_loader = create_data_loaders(
|
||||
root_dir=dataset_path,
|
||||
batch_size=16,
|
||||
train_split=0.8,
|
||||
num_workers=0, # Use 0 for debugging, increase for training
|
||||
image_size=(256, 256),
|
||||
augment_train=True,
|
||||
augment_val=False,
|
||||
)
|
||||
|
||||
print(f"Train batches: {len(train_loader)}")
|
||||
print(f"Val batches: {len(val_loader)}")
|
||||
|
||||
# Get a batch from train loader
|
||||
batch = next(iter(train_loader))
|
||||
print(f"\nBatch keys: {list(batch.keys())}")
|
||||
print(f"Batch size: {batch['google_img'].shape[0]}")
|
||||
print(f"Image shape: {batch['google_img'].shape[1:]}")
|
||||
|
||||
return train_loader, val_loader
|
||||
|
||||
|
||||
def example_training_config():
|
||||
"""Example 4: Setting up training configuration."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Example 4: Training configuration")
|
||||
print("=" * 60)
|
||||
|
||||
# Training configuration
|
||||
config = {
|
||||
# Model config
|
||||
"model_type": "cnn",
|
||||
"hidden_channels": 64,
|
||||
"num_blocks": 4,
|
||||
"dropout_rate": 0.3,
|
||||
"use_batch_norm": True,
|
||||
"image_size": [256, 256],
|
||||
# Training config
|
||||
"epochs": 50,
|
||||
"batch_size": 16,
|
||||
"learning_rate": 1e-3,
|
||||
"weight_decay": 1e-4,
|
||||
"optimizer": "adam",
|
||||
"scheduler": "plateau",
|
||||
"grad_clip": 1.0,
|
||||
# Loss config
|
||||
"matrix_weight": 1.0,
|
||||
"geometric_weight": 0.5,
|
||||
"reg_weight": 0.1,
|
||||
"grid_size": 8,
|
||||
# Data config
|
||||
"data_dir": r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
"train_split": 0.8,
|
||||
"num_workers": 0,
|
||||
# Output config
|
||||
"output_dir": "runs/example_training",
|
||||
"seed": 42,
|
||||
}
|
||||
|
||||
print("Training configuration:")
|
||||
for key, value in config.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def example_inference():
|
||||
"""Example 5: Performing inference with a trained model."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Example 5: Inference example")
|
||||
print("=" * 60)
|
||||
|
||||
# Note: This example assumes you have a trained model
|
||||
# For demonstration, we'll show the code structure
|
||||
|
||||
print("Inference workflow:")
|
||||
print("1. Load trained model")
|
||||
print("2. Preprocess input images")
|
||||
print("3. Predict homography matrix")
|
||||
print("4. Visualize alignment")
|
||||
|
||||
# Example code structure (commented out since we don't have a trained model yet)
|
||||
"""
|
||||
# Create inference object
|
||||
inference = HomographyInference(
|
||||
model_path="runs/homography/checkpoint_best.pth",
|
||||
device="cuda" if torch.cuda.is_available() else "cpu",
|
||||
)
|
||||
|
||||
# Load images
|
||||
google_img = Image.open("path/to/google.png")
|
||||
yandex_img = Image.open("path/to/yandex.png")
|
||||
|
||||
# Predict homography
|
||||
homography = inference.predict(google_img, yandex_img)
|
||||
|
||||
print(f"Predicted homography matrix:")
|
||||
print(homography.cpu().numpy())
|
||||
|
||||
# Visualize alignment
|
||||
inference.visualize_alignment(
|
||||
google_img,
|
||||
yandex_img,
|
||||
homography.cpu().numpy(),
|
||||
save_path="alignment_visualization.png",
|
||||
show=True,
|
||||
)
|
||||
"""
|
||||
|
||||
print(
|
||||
"\nNote: To run actual inference, first train a model using train_homography.py"
|
||||
)
|
||||
|
||||
|
||||
def example_quick_start():
|
||||
"""Example 6: Quick start guide."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Example 6: Quick Start Guide")
|
||||
print("=" * 60)
|
||||
|
||||
print("\nTo get started with homography estimation:")
|
||||
print("\n1. First, explore the dataset:")
|
||||
print(
|
||||
' python -c "from models.homography import HomographyDataset; '
|
||||
"dataset = HomographyDataset(r'C:\\Users\\admin\\Projects\\autopilot\\datasets\\ya_go_maps\\images'); "
|
||||
"print(f'Found {len(dataset)} image pairs')\""
|
||||
)
|
||||
|
||||
print("\n2. Train a model:")
|
||||
print(" python models/train_homography.py \\")
|
||||
print(
|
||||
' --data_dir "C:\\Users\\admin\\Projects\\autopilot\\datasets\\ya_go_maps\\images" \\'
|
||||
)
|
||||
print(" --epochs 50 \\")
|
||||
print(" --batch_size 16 \\")
|
||||
print(' --output_dir "runs/my_experiment"')
|
||||
|
||||
print("\n3. Perform inference on a single image pair:")
|
||||
print(" python models/infer_homography.py \\")
|
||||
print(' --model_path "runs/my_experiment/checkpoint_best.pth" \\')
|
||||
print(" --mode single \\")
|
||||
print(' --google_path "path/to/google.png" \\')
|
||||
print(' --yandex_path "path/to/yandex.png" \\')
|
||||
print(' --output_vis "alignment_result.png"')
|
||||
|
||||
print("\n4. Evaluate on the entire dataset:")
|
||||
print(" python models/infer_homography.py \\")
|
||||
print(' --model_path "runs/my_experiment/checkpoint_best.pth" \\')
|
||||
print(" --mode dataset \\")
|
||||
print(
|
||||
' --dataset_dir "C:\\Users\\admin\\Projects\\autopilot\\datasets\\ya_go_maps\\images" \\'
|
||||
)
|
||||
print(' --save_results "evaluation_results.json"')
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all examples."""
|
||||
print("Homography Estimation Workflow Examples")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Example 1: Dataset loading
|
||||
dataset = example_dataset_loading()
|
||||
|
||||
# Example 2: Model creation
|
||||
model, loss_fn = example_model_creation()
|
||||
|
||||
# Example 3: Data loaders
|
||||
train_loader, val_loader = example_data_loaders()
|
||||
|
||||
# Example 4: Training configuration
|
||||
config = example_training_config()
|
||||
|
||||
# Example 5: Inference
|
||||
example_inference()
|
||||
|
||||
# Example 6: Quick start
|
||||
example_quick_start()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("All examples completed successfully!")
|
||||
print("=" * 60)
|
||||
|
||||
print("\nNext steps:")
|
||||
print("1. Run the training script to train a model")
|
||||
print("2. Use the inference script to test the trained model")
|
||||
print("3. Integrate the homography estimation into your autopilot system")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError running examples: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1679
models/SiaN/homography.ipynb
Normal file
1679
models/SiaN/homography.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
434
models/SiaN/homography.py
Normal file
434
models/SiaN/homography.py
Normal file
@@ -0,0 +1,434 @@
|
||||
import os
|
||||
import random
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
|
||||
|
||||
class HomographyDataset(Dataset):
|
||||
"""
|
||||
Dataset for homography estimation between Yandex and Google map image pairs.
|
||||
|
||||
This dataset loads pairs of images (Yandex and Google maps) and provides
|
||||
homography matrices for data augmentation and training.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root_dir: str,
|
||||
transform=None,
|
||||
augment: bool = True,
|
||||
max_samples: Optional[int] = None,
|
||||
image_size: Tuple[int, int] = (700, 700),
|
||||
cache_homographies: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the HomographyDataset.
|
||||
|
||||
Args:
|
||||
root_dir: Directory containing image pairs (format: {idx:04d}_google.png, {idx:04d}_yandex.png)
|
||||
transform: Optional torchvision transforms to apply
|
||||
augment: Whether to apply homography-based data augmentation
|
||||
max_samples: Maximum number of samples to load (None for all)
|
||||
image_size: Target size for images (height, width)
|
||||
cache_homographies: Whether to cache generated homography matrices to disk
|
||||
"""
|
||||
self.root_dir = root_dir
|
||||
self.transform = transform
|
||||
self.augment = augment
|
||||
self.image_size = image_size
|
||||
self.cache_homographies = cache_homographies
|
||||
|
||||
# Find all image pairs
|
||||
self.image_pairs = self._discover_image_pairs()
|
||||
|
||||
if max_samples is not None:
|
||||
self.image_pairs = self.image_pairs[:max_samples]
|
||||
|
||||
print(f"Found {len(self.image_pairs)} image pairs in {root_dir}")
|
||||
|
||||
# Create directory for cached homographies if needed
|
||||
if cache_homographies:
|
||||
self.homography_cache_dir = os.path.join(root_dir, "homography_cache")
|
||||
os.makedirs(self.homography_cache_dir, exist_ok=True)
|
||||
|
||||
def _discover_image_pairs(self) -> List[Dict[str, Any]]:
|
||||
"""Discover all Google-Yandex image pairs in the dataset directory."""
|
||||
image_pairs = []
|
||||
|
||||
# Get all Google images
|
||||
google_files = [
|
||||
f for f in os.listdir(self.root_dir) if f.endswith("_google.png")
|
||||
]
|
||||
|
||||
for google_file in sorted(google_files):
|
||||
# Extract index from filename
|
||||
idx_str = google_file.split("_")[0]
|
||||
try:
|
||||
idx = int(idx_str)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Check if corresponding Yandex image exists
|
||||
yandex_file = f"{idx:04d}_yandex.png"
|
||||
yandex_path = os.path.join(self.root_dir, yandex_file)
|
||||
|
||||
if os.path.exists(yandex_path):
|
||||
image_pairs.append(
|
||||
{
|
||||
"idx": idx,
|
||||
"google_path": os.path.join(self.root_dir, google_file),
|
||||
"yandex_path": yandex_path,
|
||||
}
|
||||
)
|
||||
|
||||
return image_pairs
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the number of image pairs in the dataset."""
|
||||
return len(self.image_pairs)
|
||||
|
||||
def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
|
||||
"""
|
||||
Get a sample from the dataset.
|
||||
|
||||
Returns a dictionary with:
|
||||
- 'google_img': Google map image tensor
|
||||
- 'yandex_img': Yandex map image tensor
|
||||
- 'homography': Ground truth homography matrix (3x3)
|
||||
- 'idx': Sample index
|
||||
"""
|
||||
pair_info = self.image_pairs[idx]
|
||||
|
||||
# Load images
|
||||
google_img = Image.open(pair_info["google_path"]).convert("RGB")
|
||||
yandex_img = Image.open(pair_info["yandex_path"]).convert("RGB")
|
||||
|
||||
# Resize images to target size
|
||||
google_img = google_img.resize(
|
||||
(self.image_size[1], self.image_size[0]), Image.BILINEAR
|
||||
)
|
||||
yandex_img = yandex_img.resize(
|
||||
(self.image_size[1], self.image_size[0]), Image.BILINEAR
|
||||
)
|
||||
|
||||
# Get or generate homography matrix
|
||||
homography_matrix = self._get_homography_matrix(pair_info["idx"])
|
||||
|
||||
# Apply data augmentation if enabled
|
||||
if self.augment:
|
||||
google_img, yandex_img, homography_matrix = self._apply_augmentation(
|
||||
google_img, yandex_img, homography_matrix
|
||||
)
|
||||
|
||||
# Convert images to tensors
|
||||
if self.transform:
|
||||
google_img = self.transform(google_img)
|
||||
yandex_img = self.transform(yandex_img)
|
||||
else:
|
||||
# Default conversion to tensor
|
||||
google_img = (
|
||||
torch.from_numpy(np.array(google_img)).float().permute(2, 0, 1) / 255.0
|
||||
)
|
||||
yandex_img = (
|
||||
torch.from_numpy(np.array(yandex_img)).float().permute(2, 0, 1) / 255.0
|
||||
)
|
||||
|
||||
# Convert homography to tensor
|
||||
homography_tensor = torch.from_numpy(homography_matrix).float()
|
||||
|
||||
return {
|
||||
"google_img": google_img,
|
||||
"yandex_img": yandex_img,
|
||||
"homography": homography_tensor,
|
||||
"idx": torch.tensor(pair_info["idx"], dtype=torch.long),
|
||||
}
|
||||
|
||||
def _get_homography_matrix(self, idx: int) -> np.ndarray:
|
||||
"""
|
||||
Get homography matrix for a given index.
|
||||
|
||||
If cached homography exists, load it. Otherwise generate a new one.
|
||||
"""
|
||||
if self.cache_homographies:
|
||||
cache_path = os.path.join(
|
||||
self.homography_cache_dir, f"{idx:04d}_homography.npy"
|
||||
)
|
||||
if os.path.exists(cache_path):
|
||||
return np.load(cache_path)
|
||||
|
||||
# Generate new homography matrix
|
||||
homography_matrix = self.generate_random_homography()
|
||||
|
||||
# Cache if enabled
|
||||
if self.cache_homographies:
|
||||
np.save(cache_path, homography_matrix)
|
||||
|
||||
return homography_matrix
|
||||
|
||||
def generate_random_homography(self) -> np.ndarray:
|
||||
"""
|
||||
Generate a random homography matrix for data augmentation.
|
||||
|
||||
Returns:
|
||||
np.ndarray: 3x3 homography matrix.
|
||||
"""
|
||||
# Generate random affine transformation parameters
|
||||
angle = np.random.uniform(-30, 30) # rotation in degrees
|
||||
scale = np.random.uniform(0.8, 1.2) # scaling factor
|
||||
tx = np.random.uniform(-50, 50) # translation in x
|
||||
ty = np.random.uniform(-50, 50) # translation in y
|
||||
|
||||
# Convert angle to radians
|
||||
theta = np.radians(angle)
|
||||
|
||||
# Create affine transformation matrix
|
||||
affine_matrix = np.array(
|
||||
[
|
||||
[scale * np.cos(theta), -scale * np.sin(theta), tx],
|
||||
[scale * np.sin(theta), scale * np.cos(theta), ty],
|
||||
[0, 0, 1],
|
||||
]
|
||||
)
|
||||
|
||||
# Add small perspective distortion
|
||||
perspective = np.random.uniform(-0.001, 0.001, (2, 3))
|
||||
perspective = np.vstack([perspective, [0, 0, 0]])
|
||||
|
||||
homography_matrix = affine_matrix + perspective
|
||||
|
||||
return homography_matrix
|
||||
|
||||
def _apply_augmentation(
|
||||
self,
|
||||
google_img: Image.Image,
|
||||
yandex_img: Image.Image,
|
||||
base_homography: np.ndarray,
|
||||
) -> Tuple[Image.Image, Image.Image, np.ndarray]:
|
||||
"""
|
||||
Apply homography-based data augmentation to image pair.
|
||||
|
||||
Args:
|
||||
google_img: Google map image
|
||||
yandex_img: Yandex map image
|
||||
base_homography: Base homography matrix
|
||||
|
||||
Returns:
|
||||
Tuple of (augmented_google_img, augmented_yandex_img, augmented_homography)
|
||||
"""
|
||||
# Generate augmentation homography
|
||||
aug_homography = self.generate_random_homography()
|
||||
|
||||
# Combine with base homography
|
||||
combined_homography = aug_homography @ base_homography
|
||||
|
||||
# Apply augmentation to both images
|
||||
google_aug = self._apply_homography_to_image(google_img, aug_homography)
|
||||
yandex_aug = self._apply_homography_to_image(yandex_img, aug_homography)
|
||||
|
||||
return google_aug, yandex_aug, combined_homography
|
||||
|
||||
def _apply_homography_to_image(
|
||||
self, img: Image.Image, homography: np.ndarray
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Apply homography transformation to a single image.
|
||||
|
||||
Args:
|
||||
img: PIL Image to transform
|
||||
homography: 3x3 homography matrix
|
||||
|
||||
Returns:
|
||||
Transformed PIL Image
|
||||
"""
|
||||
# Convert to numpy array
|
||||
img_np = np.array(img)
|
||||
|
||||
# Get image dimensions
|
||||
h, w = img_np.shape[:2]
|
||||
|
||||
# Apply homography transformation
|
||||
transformed = cv2.warpPerspective(
|
||||
img_np,
|
||||
homography,
|
||||
(w, h),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REFLECT,
|
||||
)
|
||||
|
||||
# Convert back to PIL Image
|
||||
return Image.fromarray(transformed)
|
||||
|
||||
def get_sample_without_augmentation(self, idx: int) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a sample without data augmentation.
|
||||
|
||||
Useful for visualization and evaluation.
|
||||
"""
|
||||
pair_info = self.image_pairs[idx]
|
||||
|
||||
# Load images
|
||||
google_img = Image.open(pair_info["google_path"]).convert("RGB")
|
||||
yandex_img = Image.open(pair_info["yandex_path"]).convert("RGB")
|
||||
|
||||
# Resize
|
||||
google_img = google_img.resize(
|
||||
(self.image_size[1], self.image_size[0]), Image.BILINEAR
|
||||
)
|
||||
yandex_img = yandex_img.resize(
|
||||
(self.image_size[1], self.image_size[0]), Image.BILINEAR
|
||||
)
|
||||
|
||||
# Get homography matrix
|
||||
homography_matrix = self._get_homography_matrix(pair_info["idx"])
|
||||
|
||||
return {
|
||||
"google_img": google_img,
|
||||
"yandex_img": yandex_img,
|
||||
"homography": homography_matrix,
|
||||
"idx": pair_info["idx"],
|
||||
"google_path": pair_info["google_path"],
|
||||
"yandex_path": pair_info["yandex_path"],
|
||||
}
|
||||
|
||||
|
||||
def create_data_loaders(
|
||||
root_dir: str,
|
||||
batch_size: int = 32,
|
||||
train_split: float = 0.8,
|
||||
num_workers: int = 4,
|
||||
image_size: Tuple[int, int] = (256, 256),
|
||||
augment_train: bool = True,
|
||||
augment_val: bool = False,
|
||||
) -> Tuple[DataLoader, DataLoader]:
|
||||
"""
|
||||
Create train and validation data loaders for homography estimation.
|
||||
|
||||
Args:
|
||||
root_dir: Directory containing image pairs
|
||||
batch_size: Batch size for data loaders
|
||||
train_split: Fraction of data to use for training
|
||||
num_workers: Number of worker processes for data loading
|
||||
image_size: Target image size (height, width)
|
||||
augment_train: Whether to augment training data
|
||||
augment_val: Whether to augment validation data
|
||||
|
||||
Returns:
|
||||
Tuple of (train_loader, val_loader)
|
||||
"""
|
||||
from torchvision import transforms
|
||||
|
||||
# Define transforms
|
||||
transform = transforms.Compose(
|
||||
[
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
]
|
||||
)
|
||||
|
||||
# Create full dataset
|
||||
full_dataset = HomographyDataset(
|
||||
root_dir=root_dir,
|
||||
transform=transform,
|
||||
augment=False, # We'll handle augmentation separately
|
||||
image_size=image_size,
|
||||
cache_homographies=True,
|
||||
)
|
||||
|
||||
# Split dataset
|
||||
dataset_size = len(full_dataset)
|
||||
train_size = int(train_split * dataset_size)
|
||||
val_size = dataset_size - train_size
|
||||
|
||||
# Create indices for splitting
|
||||
indices = list(range(dataset_size))
|
||||
random.shuffle(indices)
|
||||
train_indices = indices[:train_size]
|
||||
val_indices = indices[train_size:]
|
||||
|
||||
# Create subset samplers
|
||||
from torch.utils.data import Subset
|
||||
|
||||
train_dataset = Subset(full_dataset, train_indices)
|
||||
val_dataset = Subset(full_dataset, val_indices)
|
||||
|
||||
# Apply augmentation by overriding __getitem__ for train dataset
|
||||
if augment_train:
|
||||
|
||||
class AugmentedSubset(Subset):
|
||||
def __getitem__(self, idx):
|
||||
sample = self.dataset[self.indices[idx]]
|
||||
# Apply augmentation
|
||||
google_img = sample["google_img"]
|
||||
yandex_img = sample["yandex_img"]
|
||||
homography = sample["homography"]
|
||||
|
||||
# Generate augmentation homography
|
||||
aug_homography = torch.from_numpy(
|
||||
full_dataset.generate_random_homography()
|
||||
).float()
|
||||
|
||||
# Combine homographies
|
||||
combined_homography = aug_homography @ homography
|
||||
|
||||
# Apply augmentation (simplified - in practice would warp images)
|
||||
# For now, we just return the combined homography
|
||||
return {
|
||||
"google_img": google_img,
|
||||
"yandex_img": yandex_img,
|
||||
"homography": combined_homography,
|
||||
"idx": sample["idx"],
|
||||
}
|
||||
|
||||
train_dataset = AugmentedSubset(full_dataset, train_indices)
|
||||
|
||||
# Create data loaders
|
||||
train_loader = DataLoader(
|
||||
train_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=True,
|
||||
num_workers=num_workers,
|
||||
pin_memory=True,
|
||||
)
|
||||
|
||||
val_loader = DataLoader(
|
||||
val_dataset,
|
||||
batch_size=batch_size,
|
||||
shuffle=False,
|
||||
num_workers=num_workers,
|
||||
pin_memory=True,
|
||||
)
|
||||
|
||||
return train_loader, val_loader
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
dataset = HomographyDataset(
|
||||
root_dir=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
augment=True,
|
||||
image_size=(256, 256),
|
||||
)
|
||||
|
||||
print(f"Dataset size: {len(dataset)}")
|
||||
|
||||
# Get a sample
|
||||
sample = dataset[0]
|
||||
print(f"Sample keys: {list(sample.keys())}")
|
||||
print(f"Google image shape: {sample['google_img'].shape}")
|
||||
print(f"Yandex image shape: {sample['yandex_img'].shape}")
|
||||
print(f"Homography shape: {sample['homography'].shape}")
|
||||
|
||||
# Create data loaders
|
||||
train_loader, val_loader = create_data_loaders(
|
||||
root_dir=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
batch_size=16,
|
||||
train_split=0.8,
|
||||
)
|
||||
|
||||
print(f"Train batches: {len(train_loader)}")
|
||||
print(f"Val batches: {len(val_loader)}")
|
||||
551
models/SiaN/homography_cnn.py
Normal file
551
models/SiaN/homography_cnn.py
Normal file
@@ -0,0 +1,551 @@
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class HomographyCNN(nn.Module):
|
||||
"""
|
||||
CNN model for homography estimation between two images.
|
||||
|
||||
This model takes two images (Google and Yandex maps) as input and
|
||||
outputs a 3x3 homography matrix that transforms one image to align with the other.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_channels: int = 3,
|
||||
hidden_channels: int = 64,
|
||||
num_blocks: int = 4,
|
||||
dropout_rate: float = 0.3,
|
||||
use_batch_norm: bool = True,
|
||||
output_size: int = 9, # Flattened 3x3 homography matrix
|
||||
):
|
||||
"""
|
||||
Initialize the HomographyCNN model.
|
||||
|
||||
Args:
|
||||
input_channels: Number of input channels per image (3 for RGB)
|
||||
hidden_channels: Base number of channels in the network
|
||||
num_blocks: Number of convolutional blocks
|
||||
dropout_rate: Dropout rate for regularization
|
||||
use_batch_norm: Whether to use batch normalization
|
||||
output_size: Size of output vector (9 for flattened 3x3 matrix)
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.input_channels = input_channels
|
||||
self.hidden_channels = hidden_channels
|
||||
self.num_blocks = num_blocks
|
||||
self.dropout_rate = dropout_rate
|
||||
self.use_batch_norm = use_batch_norm
|
||||
|
||||
# Feature extraction for each image separately
|
||||
self.google_encoder = self._build_encoder()
|
||||
self.yandex_encoder = self._build_encoder()
|
||||
|
||||
# Fusion layers to combine features from both images
|
||||
self.fusion_layers = self._build_fusion_layers()
|
||||
|
||||
# Regression head for homography estimation
|
||||
self.regression_head = self._build_regression_head(output_size)
|
||||
|
||||
# Initialize weights
|
||||
self._initialize_weights()
|
||||
|
||||
def _build_encoder(self) -> nn.Module:
|
||||
"""Build the encoder network for a single image."""
|
||||
layers = []
|
||||
in_channels = self.input_channels
|
||||
out_channels = self.hidden_channels
|
||||
|
||||
# First convolutional block
|
||||
layers.append(
|
||||
nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=2, padding=3)
|
||||
)
|
||||
if self.use_batch_norm:
|
||||
layers.append(nn.BatchNorm2d(out_channels))
|
||||
layers.append(nn.ReLU(inplace=True))
|
||||
layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
|
||||
|
||||
# Additional convolutional blocks
|
||||
for i in range(self.num_blocks):
|
||||
block_in_channels = out_channels
|
||||
block_out_channels = out_channels * 2 if i < 2 else out_channels
|
||||
|
||||
layers.append(
|
||||
ResidualBlock(
|
||||
in_channels=block_in_channels,
|
||||
out_channels=block_out_channels,
|
||||
stride=1 if i == 0 else 2,
|
||||
dropout_rate=self.dropout_rate,
|
||||
use_batch_norm=self.use_batch_norm,
|
||||
)
|
||||
)
|
||||
|
||||
if i < 2:
|
||||
out_channels = block_out_channels
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _build_fusion_layers(self) -> nn.Module:
|
||||
"""Build layers to fuse features from both images."""
|
||||
# After encoding, each image has hidden_channels * 4 features
|
||||
fused_channels = (
|
||||
self.hidden_channels * 8
|
||||
) # Concatenated features from both images
|
||||
|
||||
layers = [
|
||||
# Reduce dimensionality
|
||||
nn.Conv2d(
|
||||
fused_channels, self.hidden_channels * 4, kernel_size=3, padding=1
|
||||
),
|
||||
nn.BatchNorm2d(self.hidden_channels * 4)
|
||||
if self.use_batch_norm
|
||||
else nn.Identity(),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout2d(self.dropout_rate),
|
||||
# Further processing
|
||||
nn.Conv2d(
|
||||
self.hidden_channels * 4,
|
||||
self.hidden_channels * 2,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
),
|
||||
nn.BatchNorm2d(self.hidden_channels * 2)
|
||||
if self.use_batch_norm
|
||||
else nn.Identity(),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout2d(self.dropout_rate),
|
||||
# Global pooling
|
||||
nn.AdaptiveAvgPool2d((1, 1)),
|
||||
]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _build_regression_head(self, output_size: int) -> nn.Module:
|
||||
"""Build the regression head for homography estimation."""
|
||||
# Input size after fusion and global pooling
|
||||
input_features = self.hidden_channels * 2
|
||||
|
||||
layers = [
|
||||
nn.Flatten(),
|
||||
nn.Linear(input_features, 512),
|
||||
nn.BatchNorm1d(512) if self.use_batch_norm else nn.Identity(),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(self.dropout_rate),
|
||||
nn.Linear(512, 256),
|
||||
nn.BatchNorm1d(256) if self.use_batch_norm else nn.Identity(),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(self.dropout_rate),
|
||||
nn.Linear(256, 128),
|
||||
nn.BatchNorm1d(128) if self.use_batch_norm else nn.Identity(),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(self.dropout_rate),
|
||||
nn.Linear(128, output_size),
|
||||
]
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _initialize_weights(self):
|
||||
"""Initialize model weights."""
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
google_img: torch.Tensor,
|
||||
yandex_img: torch.Tensor,
|
||||
return_matrix: bool = True,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Forward pass of the model.
|
||||
|
||||
Args:
|
||||
google_img: Google map image tensor of shape (B, C, H, W)
|
||||
yandex_img: Yandex map image tensor of shape (B, C, H, W)
|
||||
return_matrix: If True, return 3x3 matrix; if False, return flattened vector
|
||||
|
||||
Returns:
|
||||
Homography matrix tensor of shape (B, 3, 3) or flattened vector of shape (B, 9)
|
||||
"""
|
||||
# Extract features from both images
|
||||
google_features = self.google_encoder(google_img)
|
||||
yandex_features = self.yandex_encoder(yandex_img)
|
||||
|
||||
# Concatenate features along channel dimension
|
||||
combined_features = torch.cat([google_features, yandex_features], dim=1)
|
||||
|
||||
# Fuse features
|
||||
fused_features = self.fusion_layers(combined_features)
|
||||
|
||||
# Regression to get homography parameters
|
||||
homography_flat = self.regression_head(fused_features)
|
||||
|
||||
if return_matrix:
|
||||
# Reshape to 3x3 matrix
|
||||
batch_size = homography_flat.shape[0]
|
||||
homography_matrix = homography_flat.view(batch_size, 3, 3)
|
||||
|
||||
# Ensure the last element is 1 (homogeneous coordinate normalization)
|
||||
# Add small epsilon to prevent division by zero
|
||||
epsilon = 1e-8
|
||||
homography_matrix = homography_matrix / (
|
||||
homography_matrix[:, 2, 2].view(-1, 1, 1) + epsilon
|
||||
)
|
||||
|
||||
return homography_matrix
|
||||
else:
|
||||
return homography_flat
|
||||
|
||||
def predict_homography(
|
||||
self,
|
||||
google_img: torch.Tensor,
|
||||
yandex_img: torch.Tensor,
|
||||
normalize: bool = True,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Predict homography matrix with optional normalization.
|
||||
|
||||
Args:
|
||||
google_img: Google map image tensor
|
||||
yandex_img: Yandex map image tensor
|
||||
normalize: Whether to normalize the homography matrix
|
||||
|
||||
Returns:
|
||||
Predicted homography matrix
|
||||
"""
|
||||
self.eval()
|
||||
with torch.no_grad():
|
||||
homography = self.forward(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
if normalize:
|
||||
# Normalize so that last element is 1
|
||||
homography = homography / homography[:, 2, 2].view(-1, 1, 1)
|
||||
|
||||
return homography
|
||||
|
||||
|
||||
class ResidualBlock(nn.Module):
|
||||
"""Residual block with optional downsampling."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
stride: int = 1,
|
||||
dropout_rate: float = 0.3,
|
||||
use_batch_norm: bool = True,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
bias=False,
|
||||
)
|
||||
self.bn1 = nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity()
|
||||
self.relu1 = nn.ReLU(inplace=True)
|
||||
self.dropout1 = (
|
||||
nn.Dropout2d(dropout_rate) if dropout_rate > 0 else nn.Identity()
|
||||
)
|
||||
|
||||
self.conv2 = nn.Conv2d(
|
||||
out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False
|
||||
)
|
||||
self.bn2 = nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity()
|
||||
self.relu2 = nn.ReLU(inplace=True)
|
||||
self.dropout2 = (
|
||||
nn.Dropout2d(dropout_rate) if dropout_rate > 0 else nn.Identity()
|
||||
)
|
||||
|
||||
# Shortcut connection
|
||||
self.shortcut = nn.Sequential()
|
||||
if stride != 1 or in_channels != out_channels:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
in_channels, out_channels, kernel_size=1, stride=stride, bias=False
|
||||
),
|
||||
nn.BatchNorm2d(out_channels) if use_batch_norm else nn.Identity(),
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
identity = self.shortcut(x)
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu1(out)
|
||||
out = self.dropout1(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
out += identity
|
||||
out = self.relu2(out)
|
||||
out = self.dropout2(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class HomographyLoss(nn.Module):
|
||||
"""
|
||||
Custom loss function for homography estimation.
|
||||
|
||||
Combines multiple loss terms:
|
||||
1. Matrix element-wise L2 loss
|
||||
2. Geometric consistency loss (warping error)
|
||||
3. Determinant regularization (to prevent degenerate matrices)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
matrix_weight: float = 1.0,
|
||||
geometric_weight: float = 0.5,
|
||||
reg_weight: float = 0.1,
|
||||
grid_size: int = 8,
|
||||
):
|
||||
super().__init__()
|
||||
self.matrix_weight = matrix_weight
|
||||
self.geometric_weight = geometric_weight
|
||||
self.reg_weight = reg_weight
|
||||
self.grid_size = grid_size
|
||||
|
||||
# Create grid of points for geometric loss
|
||||
self.register_buffer(
|
||||
"grid_points",
|
||||
self._create_grid_points(grid_size),
|
||||
persistent=False,
|
||||
)
|
||||
|
||||
def _create_grid_points(self, grid_size: int) -> torch.Tensor:
|
||||
"""Create a grid of points for geometric consistency loss."""
|
||||
x = torch.linspace(-1, 1, grid_size)
|
||||
y = torch.linspace(-1, 1, grid_size)
|
||||
grid_y, grid_x = torch.meshgrid(y, x, indexing="ij")
|
||||
grid_points = torch.stack([grid_x.flatten(), grid_y.flatten()], dim=1)
|
||||
# Add homogeneous coordinate
|
||||
ones = torch.ones(grid_points.shape[0], 1)
|
||||
grid_points = torch.cat([grid_points, ones], dim=1)
|
||||
return grid_points.T # Shape: (3, grid_size*grid_size)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
pred_homography: torch.Tensor,
|
||||
target_homography: torch.Tensor,
|
||||
google_img: Optional[torch.Tensor] = None,
|
||||
yandex_img: Optional[torch.Tensor] = None,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Compute homography loss.
|
||||
|
||||
Args:
|
||||
pred_homography: Predicted homography matrices (B, 3, 3)
|
||||
target_homography: Target homography matrices (B, 3, 3)
|
||||
google_img: Google images (optional, for geometric loss)
|
||||
yandex_img: Yandex images (optional, for geometric loss)
|
||||
|
||||
Returns:
|
||||
Combined loss value
|
||||
"""
|
||||
batch_size = pred_homography.shape[0]
|
||||
|
||||
# 1. Matrix element-wise L2 loss
|
||||
matrix_loss = F.mse_loss(pred_homography, target_homography)
|
||||
|
||||
# 2. Geometric consistency loss (if images provided)
|
||||
geometric_loss = torch.tensor(0.0, device=pred_homography.device)
|
||||
if google_img is not None and yandex_img is not None:
|
||||
# Warp grid points with predicted homography
|
||||
grid_points = self.grid_points.unsqueeze(0).expand(batch_size, -1, -1)
|
||||
warped_points = torch.bmm(pred_homography, grid_points)
|
||||
|
||||
# Normalize homogeneous coordinates
|
||||
warped_points = warped_points / (warped_points[:, 2:3, :] + 1e-8)
|
||||
|
||||
# Warp with target homography for comparison
|
||||
target_warped_points = torch.bmm(target_homography, grid_points)
|
||||
target_warped_points = target_warped_points / (
|
||||
target_warped_points[:, 2:3, :] + 1e-8
|
||||
)
|
||||
|
||||
# Compute point-wise distance
|
||||
geometric_loss = F.mse_loss(
|
||||
warped_points[:, :2, :], target_warped_points[:, :2, :]
|
||||
)
|
||||
|
||||
# 3. Regularization loss (prevent degenerate matrices)
|
||||
# Encourage determinant to be close to 1
|
||||
pred_det = torch.det(pred_homography)
|
||||
reg_loss = F.mse_loss(pred_det, torch.ones_like(pred_det))
|
||||
|
||||
# Combine losses
|
||||
total_loss = (
|
||||
self.matrix_weight * matrix_loss
|
||||
+ self.geometric_weight * geometric_loss
|
||||
+ self.reg_weight * reg_loss
|
||||
)
|
||||
|
||||
return total_loss
|
||||
|
||||
def compute_metrics(
|
||||
self,
|
||||
pred_homography: torch.Tensor,
|
||||
target_homography: torch.Tensor,
|
||||
) -> dict:
|
||||
"""
|
||||
Compute evaluation metrics for homography estimation.
|
||||
|
||||
Args:
|
||||
pred_homography: Predicted homography matrices
|
||||
target_homography: Target homography matrices
|
||||
|
||||
Returns:
|
||||
Dictionary of metrics
|
||||
"""
|
||||
with torch.no_grad():
|
||||
# Normalize matrices
|
||||
pred_norm = pred_homography / pred_homography[:, 2, 2].view(-1, 1, 1)
|
||||
target_norm = target_homography / target_homography[:, 2, 2].view(-1, 1, 1)
|
||||
|
||||
# Matrix L2 error
|
||||
matrix_error = F.mse_loss(pred_norm, target_norm, reduction="none").mean(
|
||||
dim=(1, 2)
|
||||
)
|
||||
|
||||
# Corner error (warp 4 corners of the image)
|
||||
corners = torch.tensor(
|
||||
[[-1, -1, 1], [1, -1, 1], [1, 1, 1], [-1, 1, 1]],
|
||||
dtype=torch.float32,
|
||||
device=pred_homography.device,
|
||||
).T # Shape: (3, 4)
|
||||
|
||||
corners = corners.unsqueeze(0).expand(pred_homography.shape[0], -1, -1)
|
||||
|
||||
pred_corners = torch.bmm(pred_norm, corners)
|
||||
pred_corners = pred_corners / (pred_corners[:, 2:3, :] + 1e-8)
|
||||
|
||||
target_corners = torch.bmm(target_norm, corners)
|
||||
target_corners = target_corners / (target_corners[:, 2:3, :] + 1e-8)
|
||||
|
||||
corner_error = torch.mean(
|
||||
torch.norm(pred_corners[:, :2, :] - target_corners[:, :2, :], dim=1),
|
||||
dim=1,
|
||||
)
|
||||
|
||||
# Average corner error in pixels (assuming image coordinates in [-1, 1])
|
||||
# Convert to pixel error if image size is known
|
||||
avg_corner_error = corner_error.mean().item()
|
||||
|
||||
return {
|
||||
"matrix_mse": matrix_error.mean().item(),
|
||||
"corner_error": avg_corner_error,
|
||||
"corner_error_px": avg_corner_error * 128, # Assuming 256x256 images
|
||||
}
|
||||
|
||||
|
||||
def create_homography_model(
|
||||
model_type: str = "cnn",
|
||||
input_size: Tuple[int, int] = (256, 256),
|
||||
**kwargs,
|
||||
) -> nn.Module:
|
||||
"""
|
||||
Factory function to create homography estimation model.
|
||||
|
||||
Args:
|
||||
model_type: Type of model to create ('cnn' or 'resnet')
|
||||
input_size: Input image size (height, width)
|
||||
**kwargs: Additional arguments passed to model constructor
|
||||
|
||||
Returns:
|
||||
Homography estimation model
|
||||
"""
|
||||
if model_type == "cnn":
|
||||
return HomographyCNN(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unknown model type: {model_type}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the model
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# Create model
|
||||
model = HomographyCNN(
|
||||
input_channels=3,
|
||||
hidden_channels=64,
|
||||
num_blocks=4,
|
||||
dropout_rate=0.3,
|
||||
use_batch_norm=True,
|
||||
).to(device)
|
||||
|
||||
print(
|
||||
f"Model created with {sum(p.numel() for p in model.parameters()):,} parameters"
|
||||
)
|
||||
|
||||
# Create dummy input
|
||||
batch_size = 4
|
||||
height, width = 256, 256
|
||||
|
||||
google_img = torch.randn(batch_size, 3, height, width).to(device)
|
||||
yandex_img = torch.randn(batch_size, 3, height, width).to(device)
|
||||
|
||||
# Test forward pass
|
||||
print("\nTesting forward pass...")
|
||||
output = model(google_img, yandex_img, return_matrix=True)
|
||||
print(f"Output shape: {output.shape}") # Should be (4, 3, 3)
|
||||
print(f"Sample output:\n{output[0]}")
|
||||
|
||||
# Test prediction
|
||||
print("\nTesting prediction...")
|
||||
pred = model.predict_homography(google_img, yandex_img)
|
||||
print(f"Prediction shape: {pred.shape}")
|
||||
print(f"Last element (should be ~1): {pred[0, 2, 2]:.6f}")
|
||||
|
||||
# Test loss function
|
||||
print("\nTesting loss function...")
|
||||
target_homography = torch.eye(3).unsqueeze(0).expand(batch_size, -1, -1).to(device)
|
||||
loss_fn = HomographyLoss(
|
||||
matrix_weight=1.0,
|
||||
geometric_weight=0.5,
|
||||
reg_weight=0.1,
|
||||
grid_size=8,
|
||||
).to(device)
|
||||
|
||||
loss = loss_fn(output, target_homography, google_img, yandex_img)
|
||||
print(f"Loss value: {loss.item():.6f}")
|
||||
|
||||
# Test metrics
|
||||
print("\nTesting metrics...")
|
||||
metrics = loss_fn.compute_metrics(output, target_homography)
|
||||
for key, value in metrics.items():
|
||||
print(f"{key}: {value:.6f}")
|
||||
|
||||
# Test model factory
|
||||
print("\nTesting model factory...")
|
||||
model2 = create_homography_model(
|
||||
model_type="cnn",
|
||||
input_size=(256, 256),
|
||||
input_channels=3,
|
||||
hidden_channels=32,
|
||||
num_blocks=3,
|
||||
).to(device)
|
||||
|
||||
print(
|
||||
f"Model2 created with {sum(p.numel() for p in model2.parameters()):,} parameters"
|
||||
)
|
||||
|
||||
print("\nAll tests completed successfully!")
|
||||
553
models/SiaN/infer_homography.py
Normal file
553
models/SiaN/infer_homography.py
Normal file
@@ -0,0 +1,553 @@
|
||||
"""
|
||||
Inference script for homography estimation between Google and Yandex map images.
|
||||
|
||||
This script loads a trained homography estimation model and performs inference
|
||||
on new image pairs or the test dataset.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import torch
|
||||
from homography import HomographyDataset
|
||||
from homography_cnn import HomographyCNN, create_homography_model
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
|
||||
|
||||
class HomographyInference:
|
||||
"""Class for performing inference with homography estimation model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
config_path: Optional[str] = None,
|
||||
device: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the inference class.
|
||||
|
||||
Args:
|
||||
model_path: Path to trained model checkpoint
|
||||
config_path: Path to model configuration file (optional)
|
||||
device: Device to run inference on ('cuda' or 'cpu')
|
||||
"""
|
||||
# Set device
|
||||
if device is None:
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
print(f"Using device: {self.device}")
|
||||
|
||||
# Load configuration
|
||||
if config_path is None:
|
||||
# Try to find config in the same directory as model
|
||||
model_dir = Path(model_path).parent
|
||||
config_path = model_dir / "config.json"
|
||||
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path, "r") as f:
|
||||
self.config = json.load(f)
|
||||
print(f"Loaded configuration from {config_path}")
|
||||
else:
|
||||
# Use default configuration
|
||||
self.config = {
|
||||
"image_size": [256, 256],
|
||||
"hidden_channels": 64,
|
||||
"num_blocks": 4,
|
||||
"dropout_rate": 0.3,
|
||||
"use_batch_norm": True,
|
||||
}
|
||||
print("Using default configuration")
|
||||
|
||||
# Create model
|
||||
self.model = self._create_model()
|
||||
self._load_model(model_path)
|
||||
|
||||
# Set up transforms
|
||||
self.transform = self._create_transforms()
|
||||
|
||||
# Set model to evaluation mode
|
||||
self.model.eval()
|
||||
|
||||
def _create_model(self) -> HomographyCNN:
|
||||
"""Create model based on configuration."""
|
||||
image_size = self.config.get("image_size", [256, 256])
|
||||
|
||||
model = create_homography_model(
|
||||
model_type="cnn",
|
||||
input_size=tuple(image_size),
|
||||
input_channels=3,
|
||||
hidden_channels=self.config.get("hidden_channels", 64),
|
||||
num_blocks=self.config.get("num_blocks", 4),
|
||||
dropout_rate=self.config.get("dropout_rate", 0.3),
|
||||
use_batch_norm=self.config.get("use_batch_norm", True),
|
||||
)
|
||||
|
||||
return model
|
||||
|
||||
def _load_model(self, model_path: str):
|
||||
"""Load model weights from checkpoint."""
|
||||
if not os.path.exists(model_path):
|
||||
raise FileNotFoundError(f"Model file not found: {model_path}")
|
||||
|
||||
# Load checkpoint
|
||||
checkpoint = torch.load(model_path, map_location=self.device)
|
||||
|
||||
if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
|
||||
# Trainer checkpoint format
|
||||
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||
else:
|
||||
# Raw model weights format
|
||||
self.model.load_state_dict(checkpoint)
|
||||
|
||||
self.model = self.model.to(self.device)
|
||||
print(f"Loaded model from {model_path}")
|
||||
|
||||
def _create_transforms(self):
|
||||
"""Create image transforms for inference."""
|
||||
return transforms.Compose(
|
||||
[
|
||||
transforms.Resize(tuple(self.config.get("image_size", [256, 256]))),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def preprocess_images(
|
||||
self, google_img: Image.Image, yandex_img: Image.Image
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Preprocess images for inference.
|
||||
|
||||
Args:
|
||||
google_img: Google map image (PIL Image)
|
||||
yandex_img: Yandex map image (PIL Image)
|
||||
|
||||
Returns:
|
||||
Tuple of preprocessed image tensors
|
||||
"""
|
||||
# Convert to RGB if needed
|
||||
if google_img.mode != "RGB":
|
||||
google_img = google_img.convert("RGB")
|
||||
if yandex_img.mode != "RGB":
|
||||
yandex_img = yandex_img.convert("RGB")
|
||||
|
||||
# Apply transforms
|
||||
google_tensor = self.transform(google_img).unsqueeze(0) # Add batch dimension
|
||||
yandex_tensor = self.transform(yandex_img).unsqueeze(0)
|
||||
|
||||
return google_tensor, yandex_tensor
|
||||
|
||||
def predict(
|
||||
self,
|
||||
google_img: Image.Image,
|
||||
yandex_img: Image.Image,
|
||||
return_matrix: bool = True,
|
||||
normalize: bool = True,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Predict homography matrix for image pair.
|
||||
|
||||
Args:
|
||||
google_img: Google map image (PIL Image)
|
||||
yandex_img: Yandex map image (PIL Image)
|
||||
return_matrix: If True, return 3x3 matrix; if False, return flattened vector
|
||||
normalize: Whether to normalize the homography matrix
|
||||
|
||||
Returns:
|
||||
Predicted homography matrix or vector
|
||||
"""
|
||||
# Preprocess images
|
||||
google_tensor, yandex_tensor = self.preprocess_images(google_img, yandex_img)
|
||||
|
||||
# Move to device
|
||||
google_tensor = google_tensor.to(self.device)
|
||||
yandex_tensor = yandex_tensor.to(self.device)
|
||||
|
||||
# Perform inference
|
||||
with torch.no_grad():
|
||||
homography = self.model(
|
||||
google_tensor, yandex_tensor, return_matrix=return_matrix
|
||||
)
|
||||
|
||||
if return_matrix and normalize:
|
||||
# Normalize so that last element is 1
|
||||
homography = homography / homography[:, 2, 2].view(-1, 1, 1)
|
||||
|
||||
return homography.squeeze(0) # Remove batch dimension
|
||||
|
||||
def predict_from_paths(
|
||||
self,
|
||||
google_path: str,
|
||||
yandex_path: str,
|
||||
return_matrix: bool = True,
|
||||
normalize: bool = True,
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Predict homography matrix from image file paths.
|
||||
|
||||
Args:
|
||||
google_path: Path to Google map image
|
||||
yandex_path: Path to Yandex map image
|
||||
return_matrix: If True, return 3x3 matrix; if False, return flattened vector
|
||||
normalize: Whether to normalize the homography matrix
|
||||
|
||||
Returns:
|
||||
Predicted homography matrix or vector
|
||||
"""
|
||||
# Load images
|
||||
google_img = Image.open(google_path)
|
||||
yandex_img = Image.open(yandex_path)
|
||||
|
||||
return self.predict(google_img, yandex_img, return_matrix, normalize)
|
||||
|
||||
def warp_image(
|
||||
self,
|
||||
img: Image.Image,
|
||||
homography: np.ndarray,
|
||||
output_size: Optional[Tuple[int, int]] = None,
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Warp image using homography matrix.
|
||||
|
||||
Args:
|
||||
img: Input image (PIL Image)
|
||||
homography: 3x3 homography matrix (numpy array)
|
||||
output_size: Output image size (width, height). If None, uses input size.
|
||||
|
||||
Returns:
|
||||
Warped image (PIL Image)
|
||||
"""
|
||||
# Convert to numpy array
|
||||
img_np = np.array(img)
|
||||
|
||||
# Get output size
|
||||
if output_size is None:
|
||||
output_size = (img_np.shape[1], img_np.shape[0])
|
||||
|
||||
# Apply homography transformation
|
||||
warped_np = cv2.warpPerspective(
|
||||
img_np,
|
||||
homography,
|
||||
output_size,
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_REFLECT,
|
||||
)
|
||||
|
||||
# Convert back to PIL Image
|
||||
return Image.fromarray(warped_np)
|
||||
|
||||
def visualize_alignment(
|
||||
self,
|
||||
google_img: Image.Image,
|
||||
yandex_img: Image.Image,
|
||||
homography: np.ndarray,
|
||||
save_path: Optional[str] = None,
|
||||
show: bool = True,
|
||||
):
|
||||
"""
|
||||
Visualize alignment between images using homography.
|
||||
|
||||
Args:
|
||||
google_img: Google map image
|
||||
yandex_img: Yandex map image
|
||||
homography: Homography matrix
|
||||
save_path: Path to save visualization (optional)
|
||||
show: Whether to display the visualization
|
||||
"""
|
||||
# Warp yandex image to align with google
|
||||
yandex_warped = self.warp_image(yandex_img, homography)
|
||||
|
||||
# Convert images to numpy arrays for visualization
|
||||
google_np = np.array(google_img)
|
||||
yandex_np = np.array(yandex_img)
|
||||
yandex_warped_np = np.array(yandex_warped)
|
||||
|
||||
# Create visualization
|
||||
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
||||
|
||||
# Original images
|
||||
axes[0, 0].imshow(google_np)
|
||||
axes[0, 0].set_title("Google Map (Original)")
|
||||
axes[0, 0].axis("off")
|
||||
|
||||
axes[0, 1].imshow(yandex_np)
|
||||
axes[0, 1].set_title("Yandex Map (Original)")
|
||||
axes[0, 1].axis("off")
|
||||
|
||||
# Warped image
|
||||
axes[1, 0].imshow(yandex_warped_np)
|
||||
axes[1, 0].set_title("Yandex Map (Warped)")
|
||||
axes[1, 0].axis("off")
|
||||
|
||||
# Overlay (50% transparency)
|
||||
overlay = cv2.addWeighted(google_np, 0.5, yandex_warped_np, 0.5, 0)
|
||||
axes[1, 1].imshow(overlay)
|
||||
axes[1, 1].set_title("Overlay (Google + Warped Yandex)")
|
||||
axes[1, 1].axis("off")
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=150, bbox_inches="tight")
|
||||
print(f"Visualization saved to {save_path}")
|
||||
|
||||
if show:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
def evaluate_on_dataset(
|
||||
self,
|
||||
dataset_dir: str,
|
||||
num_samples: Optional[int] = None,
|
||||
save_dir: Optional[str] = None,
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Evaluate model on a dataset.
|
||||
|
||||
Args:
|
||||
dataset_dir: Directory containing image pairs
|
||||
num_samples: Number of samples to evaluate (None for all)
|
||||
save_dir: Directory to save visualizations (optional)
|
||||
|
||||
Returns:
|
||||
Dictionary of evaluation metrics
|
||||
"""
|
||||
# Create dataset
|
||||
dataset = HomographyDataset(
|
||||
root_dir=dataset_dir,
|
||||
transform=None, # We'll handle transforms manually
|
||||
augment=False,
|
||||
image_size=tuple(self.config.get("image_size", [256, 256])),
|
||||
cache_homographies=False,
|
||||
)
|
||||
|
||||
if num_samples is not None:
|
||||
indices = list(range(min(num_samples, len(dataset))))
|
||||
else:
|
||||
indices = list(range(len(dataset)))
|
||||
|
||||
errors = []
|
||||
corner_errors = []
|
||||
|
||||
print(f"Evaluating on {len(indices)} samples...")
|
||||
|
||||
for idx in indices:
|
||||
# Get sample without augmentation
|
||||
sample = dataset.get_sample_without_augmentation(idx)
|
||||
|
||||
google_img = sample["google_img"]
|
||||
yandex_img = sample["yandex_img"]
|
||||
target_homography = sample["homography"]
|
||||
|
||||
# Predict homography
|
||||
pred_homography = self.predict(
|
||||
google_img, yandex_img, return_matrix=True, normalize=True
|
||||
)
|
||||
|
||||
# Convert to numpy
|
||||
pred_homography_np = pred_homography.cpu().numpy()
|
||||
target_homography_np = target_homography
|
||||
|
||||
# Compute matrix error
|
||||
matrix_error = np.mean((pred_homography_np - target_homography_np) ** 2)
|
||||
errors.append(matrix_error)
|
||||
|
||||
# Compute corner error
|
||||
corners = np.array(
|
||||
[
|
||||
[-1, -1, 1],
|
||||
[1, -1, 1],
|
||||
[1, 1, 1],
|
||||
[-1, 1, 1],
|
||||
],
|
||||
dtype=np.float32,
|
||||
).T
|
||||
|
||||
pred_corners = pred_homography_np @ corners
|
||||
pred_corners = pred_corners / (pred_corners[2:3, :] + 1e-8)
|
||||
|
||||
target_corners = target_homography_np @ corners
|
||||
target_corners = target_corners / (target_corners[2:3, :] + 1e-8)
|
||||
|
||||
corner_error = np.mean(
|
||||
np.linalg.norm(pred_corners[:2, :] - target_corners[:2, :], axis=0)
|
||||
)
|
||||
corner_errors.append(corner_error)
|
||||
|
||||
# Save visualization if requested
|
||||
if save_dir:
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
vis_path = os.path.join(save_dir, f"sample_{idx:04d}.png")
|
||||
self.visualize_alignment(
|
||||
google_img,
|
||||
yandex_img,
|
||||
pred_homography_np,
|
||||
save_path=vis_path,
|
||||
show=False,
|
||||
)
|
||||
|
||||
# Compute metrics
|
||||
metrics = {
|
||||
"mean_matrix_error": float(np.mean(errors)),
|
||||
"std_matrix_error": float(np.std(errors)),
|
||||
"mean_corner_error": float(np.mean(corner_errors)),
|
||||
"std_corner_error": float(np.std(corner_errors)),
|
||||
"median_corner_error": float(np.median(corner_errors)),
|
||||
"max_corner_error": float(np.max(corner_errors)),
|
||||
"min_corner_error": float(np.min(corner_errors)),
|
||||
}
|
||||
|
||||
print("\nEvaluation Results:")
|
||||
for key, value in metrics.items():
|
||||
print(f" {key}: {value:.6f}")
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def main():
|
||||
"""Main inference function."""
|
||||
parser = argparse.ArgumentParser(description="Inference for homography estimation")
|
||||
|
||||
# Model arguments
|
||||
parser.add_argument(
|
||||
"--model_path",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to trained model checkpoint",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config_path",
|
||||
type=str,
|
||||
help="Path to model configuration file (optional)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
choices=["cuda", "cpu"],
|
||||
help="Device to run inference on",
|
||||
)
|
||||
|
||||
# Inference mode
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
type=str,
|
||||
default="single",
|
||||
choices=["single", "dataset", "batch"],
|
||||
help="Inference mode",
|
||||
)
|
||||
|
||||
# Single image mode
|
||||
parser.add_argument(
|
||||
"--google_path",
|
||||
type=str,
|
||||
help="Path to Google map image (single mode)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--yandex_path",
|
||||
type=str,
|
||||
help="Path to Yandex map image (single mode)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output_vis",
|
||||
type=str,
|
||||
help="Path to save visualization (single mode)",
|
||||
)
|
||||
|
||||
# Dataset mode
|
||||
parser.add_argument(
|
||||
"--dataset_dir",
|
||||
type=str,
|
||||
default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
help="Directory containing image pairs (dataset mode)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_samples",
|
||||
type=int,
|
||||
help="Number of samples to evaluate (dataset mode)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save_vis_dir",
|
||||
type=str,
|
||||
help="Directory to save visualizations (dataset mode)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save_results",
|
||||
type=str,
|
||||
help="Path to save evaluation results (dataset mode)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create inference object
|
||||
inference = HomographyInference(
|
||||
model_path=args.model_path,
|
||||
config_path=args.config_path,
|
||||
device=args.device,
|
||||
)
|
||||
|
||||
if args.mode == "single":
|
||||
# Single image pair inference
|
||||
if not args.google_path or not args.yandex_path:
|
||||
raise ValueError(
|
||||
"Both --google_path and --yandex_path are required for single mode"
|
||||
)
|
||||
|
||||
print(f"Processing single image pair:")
|
||||
print(f" Google: {args.google_path}")
|
||||
print(f" Yandex: {args.yandex_path}")
|
||||
|
||||
# Predict homography
|
||||
homography = inference.predict_from_paths(args.google_path, args.yandex_path)
|
||||
|
||||
print(f"\nPredicted homography matrix:")
|
||||
print(homography.cpu().numpy())
|
||||
|
||||
# Visualize alignment
|
||||
if args.output_vis:
|
||||
google_img = Image.open(args.google_path)
|
||||
yandex_img = Image.open(args.yandex_path)
|
||||
inference.visualize_alignment(
|
||||
google_img,
|
||||
yandex_img,
|
||||
homography.cpu().numpy(),
|
||||
save_path=args.output_vis,
|
||||
show=True,
|
||||
)
|
||||
|
||||
elif args.mode == "dataset":
|
||||
# Evaluate on dataset
|
||||
metrics = inference.evaluate_on_dataset(
|
||||
dataset_dir=args.dataset_dir,
|
||||
num_samples=args.num_samples,
|
||||
save_dir=args.save_vis_dir,
|
||||
)
|
||||
|
||||
# Save results if requested
|
||||
if args.save_results:
|
||||
with open(args.save_results, "w") as f:
|
||||
json.dump(metrics, f, indent=2)
|
||||
print(f"\nResults saved to {args.save_results}")
|
||||
|
||||
elif args.mode == "batch":
|
||||
# Batch processing (placeholder for future implementation)
|
||||
print("Batch mode not yet implemented")
|
||||
# Could implement processing multiple image pairs from a directory
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown mode: {args.mode}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
611
models/SiaN/train_homography.py
Normal file
611
models/SiaN/train_homography.py
Normal file
@@ -0,0 +1,611 @@
|
||||
"""
|
||||
Training script for homography estimation between Google and Yandex map images.
|
||||
|
||||
This script trains a CNN model to estimate homography matrices that align
|
||||
Google map images with Yandex map images.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from homography import HomographyDataset, create_data_loaders
|
||||
from homography_cnn import HomographyCNN, HomographyLoss, create_homography_model
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class HomographyTrainer:
|
||||
"""Trainer class for homography estimation model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: nn.Module,
|
||||
train_loader: DataLoader,
|
||||
val_loader: DataLoader,
|
||||
device: torch.device,
|
||||
config: Dict,
|
||||
):
|
||||
"""
|
||||
Initialize the trainer.
|
||||
|
||||
Args:
|
||||
model: Homography estimation model
|
||||
train_loader: Training data loader
|
||||
val_loader: Validation data loader
|
||||
device: Device to run training on
|
||||
config: Training configuration dictionary
|
||||
"""
|
||||
self.model = model.to(device)
|
||||
self.train_loader = train_loader
|
||||
self.val_loader = val_loader
|
||||
self.device = device
|
||||
self.config = config
|
||||
|
||||
# Loss function
|
||||
self.criterion = HomographyLoss(
|
||||
matrix_weight=config.get("matrix_weight", 1.0),
|
||||
geometric_weight=config.get("geometric_weight", 0.5),
|
||||
reg_weight=config.get("reg_weight", 0.1),
|
||||
grid_size=config.get("grid_size", 8),
|
||||
).to(device)
|
||||
|
||||
# Optimizer
|
||||
optimizer_name = config.get("optimizer", "adam").lower()
|
||||
lr = config.get("learning_rate", 1e-3)
|
||||
weight_decay = config.get("weight_decay", 1e-4)
|
||||
|
||||
if optimizer_name == "adam":
|
||||
self.optimizer = optim.Adam(
|
||||
self.model.parameters(), lr=lr, weight_decay=weight_decay
|
||||
)
|
||||
elif optimizer_name == "adamw":
|
||||
self.optimizer = optim.AdamW(
|
||||
self.model.parameters(), lr=lr, weight_decay=weight_decay
|
||||
)
|
||||
elif optimizer_name == "sgd":
|
||||
self.optimizer = optim.SGD(
|
||||
self.model.parameters(),
|
||||
lr=lr,
|
||||
momentum=config.get("momentum", 0.9),
|
||||
weight_decay=weight_decay,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown optimizer: {optimizer_name}")
|
||||
|
||||
# Learning rate scheduler
|
||||
scheduler_name = config.get("scheduler", "plateau").lower()
|
||||
if scheduler_name == "plateau":
|
||||
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
||||
self.optimizer,
|
||||
mode="min",
|
||||
factor=config.get("scheduler_factor", 0.5),
|
||||
patience=config.get("scheduler_patience", 5),
|
||||
verbose=True,
|
||||
)
|
||||
elif scheduler_name == "cosine":
|
||||
self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
||||
self.optimizer,
|
||||
T_max=config.get("epochs", 100),
|
||||
eta_min=config.get("min_lr", 1e-6),
|
||||
)
|
||||
elif scheduler_name == "step":
|
||||
self.scheduler = optim.lr_scheduler.StepLR(
|
||||
self.optimizer,
|
||||
step_size=config.get("step_size", 30),
|
||||
gamma=config.get("gamma", 0.1),
|
||||
)
|
||||
else:
|
||||
self.scheduler = None
|
||||
|
||||
# Training state
|
||||
self.current_epoch = 0
|
||||
self.best_val_loss = float("inf")
|
||||
self.train_losses: List[float] = []
|
||||
self.val_losses: List[float] = []
|
||||
self.val_metrics: List[Dict] = []
|
||||
|
||||
# Create output directory
|
||||
self.output_dir = Path(config.get("output_dir", "runs/homography"))
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# TensorBoard writer
|
||||
self.writer = SummaryWriter(log_dir=self.output_dir / "tensorboard")
|
||||
|
||||
# Save configuration
|
||||
config_path = self.output_dir / "config.json"
|
||||
with open(config_path, "w") as f:
|
||||
json.dump(config, f, indent=2)
|
||||
|
||||
print(f"Training configuration saved to {config_path}")
|
||||
print(
|
||||
f"Model has {sum(p.numel() for p in self.model.parameters()):,} parameters"
|
||||
)
|
||||
|
||||
def train_epoch(self) -> float:
|
||||
"""
|
||||
Train for one epoch.
|
||||
|
||||
Returns:
|
||||
Average training loss for the epoch
|
||||
"""
|
||||
self.model.train()
|
||||
total_loss = 0.0
|
||||
num_batches = len(self.train_loader)
|
||||
|
||||
progress_bar = tqdm(self.train_loader, desc=f"Epoch {self.current_epoch + 1}")
|
||||
for batch_idx, batch in enumerate(progress_bar):
|
||||
# Move data to device
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target_homography = batch["homography"].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
self.optimizer.zero_grad()
|
||||
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
# Compute loss
|
||||
loss = self.criterion(
|
||||
pred_homography,
|
||||
target_homography,
|
||||
google_img,
|
||||
yandex_img,
|
||||
)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Gradient clipping
|
||||
if self.config.get("grad_clip", 1.0) > 0:
|
||||
torch.nn.utils.clip_grad_norm_(
|
||||
self.model.parameters(),
|
||||
self.config.get("grad_clip", 1.0),
|
||||
)
|
||||
|
||||
# Optimizer step
|
||||
self.optimizer.step()
|
||||
|
||||
# Update statistics
|
||||
total_loss += loss.item()
|
||||
|
||||
# Update progress bar
|
||||
progress_bar.set_postfix({"loss": loss.item()})
|
||||
|
||||
# Log batch loss to TensorBoard
|
||||
global_step = self.current_epoch * num_batches + batch_idx
|
||||
self.writer.add_scalar("train/batch_loss", loss.item(), global_step)
|
||||
|
||||
avg_loss = total_loss / num_batches
|
||||
self.train_losses.append(avg_loss)
|
||||
|
||||
return avg_loss
|
||||
|
||||
@torch.no_grad()
|
||||
def validate(self) -> Tuple[float, Dict]:
|
||||
"""
|
||||
Validate the model.
|
||||
|
||||
Returns:
|
||||
Tuple of (average validation loss, validation metrics)
|
||||
"""
|
||||
self.model.eval()
|
||||
total_loss = 0.0
|
||||
all_metrics = []
|
||||
|
||||
progress_bar = tqdm(self.val_loader, desc="Validation")
|
||||
for batch in progress_bar:
|
||||
# Move data to device
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target_homography = batch["homography"].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
# Compute loss
|
||||
loss = self.criterion(
|
||||
pred_homography,
|
||||
target_homography,
|
||||
google_img,
|
||||
yandex_img,
|
||||
)
|
||||
|
||||
# Compute metrics
|
||||
metrics = self.criterion.compute_metrics(pred_homography, target_homography)
|
||||
|
||||
# Update statistics
|
||||
total_loss += loss.item()
|
||||
all_metrics.append(metrics)
|
||||
|
||||
# Update progress bar
|
||||
progress_bar.set_postfix({"loss": loss.item()})
|
||||
|
||||
avg_loss = total_loss / len(self.val_loader)
|
||||
self.val_losses.append(avg_loss)
|
||||
|
||||
# Aggregate metrics
|
||||
avg_metrics = {}
|
||||
for key in all_metrics[0].keys():
|
||||
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
|
||||
|
||||
self.val_metrics.append(avg_metrics)
|
||||
|
||||
return avg_loss, avg_metrics
|
||||
|
||||
def save_checkpoint(self, is_best: bool = False):
|
||||
"""Save model checkpoint."""
|
||||
checkpoint = {
|
||||
"epoch": self.current_epoch,
|
||||
"model_state_dict": self.model.state_dict(),
|
||||
"optimizer_state_dict": self.optimizer.state_dict(),
|
||||
"train_losses": self.train_losses,
|
||||
"val_losses": self.val_losses,
|
||||
"val_metrics": self.val_metrics,
|
||||
"best_val_loss": self.best_val_loss,
|
||||
"config": self.config,
|
||||
}
|
||||
|
||||
if self.scheduler is not None:
|
||||
checkpoint["scheduler_state_dict"] = self.scheduler.state_dict()
|
||||
|
||||
# Save latest checkpoint
|
||||
checkpoint_path = self.output_dir / "checkpoint_latest.pth"
|
||||
torch.save(checkpoint, checkpoint_path)
|
||||
|
||||
# Save best checkpoint
|
||||
if is_best:
|
||||
best_path = self.output_dir / "checkpoint_best.pth"
|
||||
torch.save(checkpoint, best_path)
|
||||
print(f"Best model saved to {best_path}")
|
||||
|
||||
def load_checkpoint(self, checkpoint_path: str):
|
||||
"""Load model checkpoint."""
|
||||
checkpoint = torch.load(checkpoint_path, map_location=self.device)
|
||||
|
||||
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
|
||||
|
||||
if self.scheduler is not None and "scheduler_state_dict" in checkpoint:
|
||||
self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
|
||||
|
||||
self.current_epoch = checkpoint["epoch"]
|
||||
self.train_losses = checkpoint["train_losses"]
|
||||
self.val_losses = checkpoint["val_losses"]
|
||||
self.val_metrics = checkpoint["val_metrics"]
|
||||
self.best_val_loss = checkpoint["best_val_loss"]
|
||||
|
||||
print(f"Loaded checkpoint from epoch {self.current_epoch}")
|
||||
|
||||
def train(self, num_epochs: int):
|
||||
"""
|
||||
Train the model for specified number of epochs.
|
||||
|
||||
Args:
|
||||
num_epochs: Number of epochs to train
|
||||
"""
|
||||
print(f"Starting training for {num_epochs} epochs...")
|
||||
start_time = time.time()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
self.current_epoch = epoch
|
||||
|
||||
# Train for one epoch
|
||||
train_loss = self.train_epoch()
|
||||
|
||||
# Validate
|
||||
val_loss, val_metrics = self.validate()
|
||||
|
||||
# Update learning rate scheduler
|
||||
if self.scheduler is not None:
|
||||
if isinstance(self.scheduler, optim.lr_scheduler.ReduceLROnPlateau):
|
||||
self.scheduler.step(val_loss)
|
||||
else:
|
||||
self.scheduler.step()
|
||||
|
||||
# Log to TensorBoard
|
||||
self.writer.add_scalar("train/epoch_loss", train_loss, epoch)
|
||||
self.writer.add_scalar("val/epoch_loss", val_loss, epoch)
|
||||
for metric_name, metric_value in val_metrics.items():
|
||||
self.writer.add_scalar(f"val/{metric_name}", metric_value, epoch)
|
||||
|
||||
# Print epoch summary
|
||||
print(f"\nEpoch {epoch + 1}/{num_epochs}:")
|
||||
print(f" Train Loss: {train_loss:.6f}")
|
||||
print(f" Val Loss: {val_loss:.6f}")
|
||||
print(" Val Metrics:")
|
||||
for metric_name, metric_value in val_metrics.items():
|
||||
print(f" {metric_name}: {metric_value:.6f}")
|
||||
|
||||
# Save checkpoint
|
||||
is_best = val_loss < self.best_val_loss
|
||||
if is_best:
|
||||
self.best_val_loss = val_loss
|
||||
|
||||
self.save_checkpoint(is_best=is_best)
|
||||
|
||||
# Early stopping
|
||||
if self.config.get("early_stopping_patience", 0) > 0:
|
||||
if (
|
||||
epoch - np.argmin(self.val_losses)
|
||||
>= self.config["early_stopping_patience"]
|
||||
):
|
||||
print(f"Early stopping at epoch {epoch + 1}")
|
||||
break
|
||||
|
||||
# Training completed
|
||||
training_time = time.time() - start_time
|
||||
print(f"\nTraining completed in {training_time:.2f} seconds")
|
||||
print(f"Best validation loss: {self.best_val_loss:.6f}")
|
||||
|
||||
# Save final model
|
||||
final_model_path = self.output_dir / "model_final.pth"
|
||||
torch.save(self.model.state_dict(), final_model_path)
|
||||
print(f"Final model saved to {final_model_path}")
|
||||
|
||||
# Close TensorBoard writer
|
||||
self.writer.close()
|
||||
|
||||
def evaluate(self, test_loader: Optional[DataLoader] = None) -> Dict:
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
test_loader: Test data loader (uses validation loader if None)
|
||||
|
||||
Returns:
|
||||
Dictionary of evaluation metrics
|
||||
"""
|
||||
if test_loader is None:
|
||||
test_loader = self.val_loader
|
||||
|
||||
self.model.eval()
|
||||
all_metrics = []
|
||||
|
||||
print("Evaluating model...")
|
||||
with torch.no_grad():
|
||||
for batch in tqdm(test_loader):
|
||||
# Move data to device
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target_homography = batch["homography"].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
# Compute metrics
|
||||
metrics = self.criterion.compute_metrics(
|
||||
pred_homography, target_homography
|
||||
)
|
||||
all_metrics.append(metrics)
|
||||
|
||||
# Aggregate metrics
|
||||
avg_metrics = {}
|
||||
for key in all_metrics[0].keys():
|
||||
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
|
||||
|
||||
# Print evaluation results
|
||||
print("\nEvaluation Results:")
|
||||
for metric_name, metric_value in avg_metrics.items():
|
||||
print(f" {metric_name}: {metric_value:.6f}")
|
||||
|
||||
# Save evaluation results
|
||||
eval_path = self.output_dir / "evaluation_results.json"
|
||||
with open(eval_path, "w") as f:
|
||||
json.dump(avg_metrics, f, indent=2)
|
||||
print(f"Evaluation results saved to {eval_path}")
|
||||
|
||||
return avg_metrics
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description="Train homography estimation model")
|
||||
|
||||
# Data arguments
|
||||
parser.add_argument(
|
||||
"--data_dir",
|
||||
type=str,
|
||||
default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
help="Directory containing image pairs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch_size", type=int, default=32, help="Batch size for training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image_size",
|
||||
type=int,
|
||||
nargs=2,
|
||||
default=[256, 256],
|
||||
help="Image size (height width)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--train_split", type=float, default=0.8, help="Train/validation split ratio"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_workers", type=int, default=4, help="Number of data loader workers"
|
||||
)
|
||||
|
||||
# Model arguments
|
||||
parser.add_argument(
|
||||
"--model_type", type=str, default="cnn", choices=["cnn"], help="Model type"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hidden_channels", type=int, default=64, help="Number of hidden channels"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_blocks", type=int, default=4, help="Number of convolutional blocks"
|
||||
)
|
||||
parser.add_argument("--dropout_rate", type=float, default=0.3, help="Dropout rate")
|
||||
parser.add_argument(
|
||||
"--use_batch_norm", action="store_true", help="Use batch normalization"
|
||||
)
|
||||
|
||||
# Training arguments
|
||||
parser.add_argument("--epochs", type=int, default=100, help="Number of epochs")
|
||||
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate")
|
||||
parser.add_argument("--weight_decay", type=float, default=1e-4, help="Weight decay")
|
||||
parser.add_argument(
|
||||
"--optimizer", type=str, default="adam", choices=["adam", "adamw", "sgd"]
|
||||
)
|
||||
parser.add_argument(
|
||||
"--scheduler",
|
||||
type=str,
|
||||
default="plateau",
|
||||
choices=["plateau", "cosine", "step", "none"],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--grad_clip", type=float, default=1.0, help="Gradient clipping value"
|
||||
)
|
||||
|
||||
# Loss arguments
|
||||
parser.add_argument(
|
||||
"--matrix_weight", type=float, default=1.0, help="Weight for matrix loss"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--geometric_weight",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Weight for geometric loss",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reg_weight", type=float, default=0.1, help="Weight for regularization loss"
|
||||
)
|
||||
|
||||
# Other arguments
|
||||
parser.add_argument(
|
||||
"--output_dir",
|
||||
type=str,
|
||||
default="runs/homography",
|
||||
help="Output directory for checkpoints and logs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
type=str,
|
||||
help="Path to checkpoint to resume training from",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eval_only",
|
||||
action="store_true",
|
||||
help="Only evaluate the model (no training)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed", type=int, default=42, help="Random seed for reproducibility"
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main training function."""
|
||||
args = parse_args()
|
||||
|
||||
# Set random seeds for reproducibility
|
||||
torch.manual_seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed(args.seed)
|
||||
torch.cuda.manual_seed_all(args.seed)
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# Create data loaders
|
||||
print("Creating data loaders...")
|
||||
train_loader, val_loader = create_data_loaders(
|
||||
root_dir=args.data_dir,
|
||||
batch_size=args.batch_size,
|
||||
train_split=args.train_split,
|
||||
num_workers=args.num_workers,
|
||||
image_size=tuple(args.image_size),
|
||||
augment_train=True,
|
||||
augment_val=False,
|
||||
)
|
||||
|
||||
print(f"Train batches: {len(train_loader)}")
|
||||
print(f"Val batches: {len(val_loader)}")
|
||||
|
||||
# Create model
|
||||
print("Creating model...")
|
||||
model = create_homography_model(
|
||||
model_type=args.model_type,
|
||||
input_size=tuple(args.image_size),
|
||||
input_channels=3,
|
||||
hidden_channels=args.hidden_channels,
|
||||
num_blocks=args.num_blocks,
|
||||
dropout_rate=args.dropout_rate,
|
||||
use_batch_norm=args.use_batch_norm,
|
||||
)
|
||||
|
||||
# Create trainer configuration
|
||||
config = {
|
||||
# Model config
|
||||
"model_type": args.model_type,
|
||||
"hidden_channels": args.hidden_channels,
|
||||
"num_blocks": args.num_blocks,
|
||||
"dropout_rate": args.dropout_rate,
|
||||
"use_batch_norm": args.use_batch_norm,
|
||||
"image_size": args.image_size,
|
||||
# Training config
|
||||
"epochs": args.epochs,
|
||||
"batch_size": args.batch_size,
|
||||
"learning_rate": args.lr,
|
||||
"weight_decay": args.weight_decay,
|
||||
"optimizer": args.optimizer,
|
||||
"scheduler": args.scheduler,
|
||||
"grad_clip": args.grad_clip,
|
||||
# Loss config
|
||||
"matrix_weight": args.matrix_weight,
|
||||
"geometric_weight": args.geometric_weight,
|
||||
"reg_weight": args.reg_weight,
|
||||
"grid_size": 8,
|
||||
# Data config
|
||||
"data_dir": args.data_dir,
|
||||
"train_split": args.train_split,
|
||||
"num_workers": args.num_workers,
|
||||
# Output config
|
||||
"output_dir": args.output_dir,
|
||||
"seed": args.seed,
|
||||
}
|
||||
|
||||
# Create trainer
|
||||
trainer = HomographyTrainer(
|
||||
model=model,
|
||||
train_loader=train_loader,
|
||||
val_loader=val_loader,
|
||||
device=device,
|
||||
config=config,
|
||||
)
|
||||
|
||||
# Resume from checkpoint if specified
|
||||
if args.resume:
|
||||
print(f"Resuming from checkpoint: {args.resume}")
|
||||
trainer.load_checkpoint(args.resume)
|
||||
|
||||
# Evaluate only mode
|
||||
if args.eval_only:
|
||||
trainer.evaluate()
|
||||
return
|
||||
|
||||
# Train the model
|
||||
trainer.train(num_epochs=args.epochs)
|
||||
|
||||
# Final evaluation
|
||||
print("\nPerforming final evaluation...")
|
||||
trainer.evaluate()
|
||||
|
||||
print("\nTraining completed successfully!")
|
||||
print(f"Results saved to: {args.output_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
611
models/SiaN/train_homography_.py
Normal file
611
models/SiaN/train_homography_.py
Normal file
@@ -0,0 +1,611 @@
|
||||
"""
|
||||
Training script for homography estimation between Google and Yandex map images.
|
||||
|
||||
This script trains a CNN model to estimate homography matrices that align
|
||||
Google map images with Yandex map images.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from homography import HomographyDataset, create_data_loaders
|
||||
from homography_cnn import HomographyCNN, HomographyLoss, create_homography_model
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class HomographyTrainer:
|
||||
"""Trainer class for homography estimation model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: nn.Module,
|
||||
train_loader: DataLoader,
|
||||
val_loader: DataLoader,
|
||||
device: torch.device,
|
||||
config: Dict,
|
||||
):
|
||||
"""
|
||||
Initialize the trainer.
|
||||
|
||||
Args:
|
||||
model: Homography estimation model
|
||||
train_loader: Training data loader
|
||||
val_loader: Validation data loader
|
||||
device: Device to run training on
|
||||
config: Training configuration dictionary
|
||||
"""
|
||||
self.model = model.to(device)
|
||||
self.train_loader = train_loader
|
||||
self.val_loader = val_loader
|
||||
self.device = device
|
||||
self.config = config
|
||||
|
||||
# Loss function
|
||||
self.criterion = HomographyLoss(
|
||||
matrix_weight=config.get("matrix_weight", 1.0),
|
||||
geometric_weight=config.get("geometric_weight", 0.5),
|
||||
reg_weight=config.get("reg_weight", 0.1),
|
||||
grid_size=config.get("grid_size", 8),
|
||||
).to(device)
|
||||
|
||||
# Optimizer
|
||||
optimizer_name = config.get("optimizer", "adam").lower()
|
||||
lr = config.get("learning_rate", 1e-3)
|
||||
weight_decay = config.get("weight_decay", 1e-4)
|
||||
|
||||
if optimizer_name == "adam":
|
||||
self.optimizer = optim.Adam(
|
||||
self.model.parameters(), lr=lr, weight_decay=weight_decay
|
||||
)
|
||||
elif optimizer_name == "adamw":
|
||||
self.optimizer = optim.AdamW(
|
||||
self.model.parameters(), lr=lr, weight_decay=weight_decay
|
||||
)
|
||||
elif optimizer_name == "sgd":
|
||||
self.optimizer = optim.SGD(
|
||||
self.model.parameters(),
|
||||
lr=lr,
|
||||
momentum=config.get("momentum", 0.9),
|
||||
weight_decay=weight_decay,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown optimizer: {optimizer_name}")
|
||||
|
||||
# Learning rate scheduler
|
||||
scheduler_name = config.get("scheduler", "plateau").lower()
|
||||
if scheduler_name == "plateau":
|
||||
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
||||
self.optimizer,
|
||||
mode="min",
|
||||
factor=config.get("scheduler_factor", 0.5),
|
||||
patience=config.get("scheduler_patience", 5),
|
||||
verbose=True,
|
||||
)
|
||||
elif scheduler_name == "cosine":
|
||||
self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
||||
self.optimizer,
|
||||
T_max=config.get("epochs", 100),
|
||||
eta_min=config.get("min_lr", 1e-6),
|
||||
)
|
||||
elif scheduler_name == "step":
|
||||
self.scheduler = optim.lr_scheduler.StepLR(
|
||||
self.optimizer,
|
||||
step_size=config.get("step_size", 30),
|
||||
gamma=config.get("gamma", 0.1),
|
||||
)
|
||||
else:
|
||||
self.scheduler = None
|
||||
|
||||
# Training state
|
||||
self.current_epoch = 0
|
||||
self.best_val_loss = float("inf")
|
||||
self.train_losses: List[float] = []
|
||||
self.val_losses: List[float] = []
|
||||
self.val_metrics: List[Dict] = []
|
||||
|
||||
# Create output directory
|
||||
self.output_dir = Path(config.get("output_dir", "runs/homography"))
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# TensorBoard writer
|
||||
self.writer = SummaryWriter(log_dir=self.output_dir / "tensorboard")
|
||||
|
||||
# Save configuration
|
||||
config_path = self.output_dir / "config.json"
|
||||
with open(config_path, "w") as f:
|
||||
json.dump(config, f, indent=2)
|
||||
|
||||
print(f"Training configuration saved to {config_path}")
|
||||
print(
|
||||
f"Model has {sum(p.numel() for p in self.model.parameters()):,} parameters"
|
||||
)
|
||||
|
||||
def train_epoch(self) -> float:
|
||||
"""
|
||||
Train for one epoch.
|
||||
|
||||
Returns:
|
||||
Average training loss for the epoch
|
||||
"""
|
||||
self.model.train()
|
||||
total_loss = 0.0
|
||||
num_batches = len(self.train_loader)
|
||||
|
||||
progress_bar = tqdm(self.train_loader, desc=f"Epoch {self.current_epoch + 1}")
|
||||
for batch_idx, batch in enumerate(progress_bar):
|
||||
# Move data to device
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target_homography = batch["homography"].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
self.optimizer.zero_grad()
|
||||
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
# Compute loss
|
||||
loss = self.criterion(
|
||||
pred_homography,
|
||||
target_homography,
|
||||
google_img,
|
||||
yandex_img,
|
||||
)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Gradient clipping
|
||||
if self.config.get("grad_clip", 1.0) > 0:
|
||||
torch.nn.utils.clip_grad_norm_(
|
||||
self.model.parameters(),
|
||||
self.config.get("grad_clip", 1.0),
|
||||
)
|
||||
|
||||
# Optimizer step
|
||||
self.optimizer.step()
|
||||
|
||||
# Update statistics
|
||||
total_loss += loss.item()
|
||||
|
||||
# Update progress bar
|
||||
progress_bar.set_postfix({"loss": loss.item()})
|
||||
|
||||
# Log batch loss to TensorBoard
|
||||
global_step = self.current_epoch * num_batches + batch_idx
|
||||
self.writer.add_scalar("train/batch_loss", loss.item(), global_step)
|
||||
|
||||
avg_loss = total_loss / num_batches
|
||||
self.train_losses.append(avg_loss)
|
||||
|
||||
return avg_loss
|
||||
|
||||
@torch.no_grad()
|
||||
def validate(self) -> Tuple[float, Dict]:
|
||||
"""
|
||||
Validate the model.
|
||||
|
||||
Returns:
|
||||
Tuple of (average validation loss, validation metrics)
|
||||
"""
|
||||
self.model.eval()
|
||||
total_loss = 0.0
|
||||
all_metrics = []
|
||||
|
||||
progress_bar = tqdm(self.val_loader, desc="Validation")
|
||||
for batch in progress_bar:
|
||||
# Move data to device
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target_homography = batch["homography"].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
# Compute loss
|
||||
loss = self.criterion(
|
||||
pred_homography,
|
||||
target_homography,
|
||||
google_img,
|
||||
yandex_img,
|
||||
)
|
||||
|
||||
# Compute metrics
|
||||
metrics = self.criterion.compute_metrics(pred_homography, target_homography)
|
||||
|
||||
# Update statistics
|
||||
total_loss += loss.item()
|
||||
all_metrics.append(metrics)
|
||||
|
||||
# Update progress bar
|
||||
progress_bar.set_postfix({"loss": loss.item()})
|
||||
|
||||
avg_loss = total_loss / len(self.val_loader)
|
||||
self.val_losses.append(avg_loss)
|
||||
|
||||
# Aggregate metrics
|
||||
avg_metrics = {}
|
||||
for key in all_metrics[0].keys():
|
||||
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
|
||||
|
||||
self.val_metrics.append(avg_metrics)
|
||||
|
||||
return avg_loss, avg_metrics
|
||||
|
||||
def save_checkpoint(self, is_best: bool = False):
|
||||
"""Save model checkpoint."""
|
||||
checkpoint = {
|
||||
"epoch": self.current_epoch,
|
||||
"model_state_dict": self.model.state_dict(),
|
||||
"optimizer_state_dict": self.optimizer.state_dict(),
|
||||
"train_losses": self.train_losses,
|
||||
"val_losses": self.val_losses,
|
||||
"val_metrics": self.val_metrics,
|
||||
"best_val_loss": self.best_val_loss,
|
||||
"config": self.config,
|
||||
}
|
||||
|
||||
if self.scheduler is not None:
|
||||
checkpoint["scheduler_state_dict"] = self.scheduler.state_dict()
|
||||
|
||||
# Save latest checkpoint
|
||||
checkpoint_path = self.output_dir / "checkpoint_latest.pth"
|
||||
torch.save(checkpoint, checkpoint_path)
|
||||
|
||||
# Save best checkpoint
|
||||
if is_best:
|
||||
best_path = self.output_dir / "checkpoint_best.pth"
|
||||
torch.save(checkpoint, best_path)
|
||||
print(f"Best model saved to {best_path}")
|
||||
|
||||
def load_checkpoint(self, checkpoint_path: str):
|
||||
"""Load model checkpoint."""
|
||||
checkpoint = torch.load(checkpoint_path, map_location=self.device)
|
||||
|
||||
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
|
||||
|
||||
if self.scheduler is not None and "scheduler_state_dict" in checkpoint:
|
||||
self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
|
||||
|
||||
self.current_epoch = checkpoint["epoch"]
|
||||
self.train_losses = checkpoint["train_losses"]
|
||||
self.val_losses = checkpoint["val_losses"]
|
||||
self.val_metrics = checkpoint["val_metrics"]
|
||||
self.best_val_loss = checkpoint["best_val_loss"]
|
||||
|
||||
print(f"Loaded checkpoint from epoch {self.current_epoch}")
|
||||
|
||||
def train(self, num_epochs: int):
|
||||
"""
|
||||
Train the model for specified number of epochs.
|
||||
|
||||
Args:
|
||||
num_epochs: Number of epochs to train
|
||||
"""
|
||||
print(f"Starting training for {num_epochs} epochs...")
|
||||
start_time = time.time()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
self.current_epoch = epoch
|
||||
|
||||
# Train for one epoch
|
||||
train_loss = self.train_epoch()
|
||||
|
||||
# Validate
|
||||
val_loss, val_metrics = self.validate()
|
||||
|
||||
# Update learning rate scheduler
|
||||
if self.scheduler is not None:
|
||||
if isinstance(self.scheduler, optim.lr_scheduler.ReduceLROnPlateau):
|
||||
self.scheduler.step(val_loss)
|
||||
else:
|
||||
self.scheduler.step()
|
||||
|
||||
# Log to TensorBoard
|
||||
self.writer.add_scalar("train/epoch_loss", train_loss, epoch)
|
||||
self.writer.add_scalar("val/epoch_loss", val_loss, epoch)
|
||||
for metric_name, metric_value in val_metrics.items():
|
||||
self.writer.add_scalar(f"val/{metric_name}", metric_value, epoch)
|
||||
|
||||
# Print epoch summary
|
||||
print(f"\nEpoch {epoch + 1}/{num_epochs}:")
|
||||
print(f" Train Loss: {train_loss:.6f}")
|
||||
print(f" Val Loss: {val_loss:.6f}")
|
||||
print(" Val Metrics:")
|
||||
for metric_name, metric_value in val_metrics.items():
|
||||
print(f" {metric_name}: {metric_value:.6f}")
|
||||
|
||||
# Save checkpoint
|
||||
is_best = val_loss < self.best_val_loss
|
||||
if is_best:
|
||||
self.best_val_loss = val_loss
|
||||
|
||||
self.save_checkpoint(is_best=is_best)
|
||||
|
||||
# Early stopping
|
||||
if self.config.get("early_stopping_patience", 0) > 0:
|
||||
if (
|
||||
epoch - np.argmin(self.val_losses)
|
||||
>= self.config["early_stopping_patience"]
|
||||
):
|
||||
print(f"Early stopping at epoch {epoch + 1}")
|
||||
break
|
||||
|
||||
# Training completed
|
||||
training_time = time.time() - start_time
|
||||
print(f"\nTraining completed in {training_time:.2f} seconds")
|
||||
print(f"Best validation loss: {self.best_val_loss:.6f}")
|
||||
|
||||
# Save final model
|
||||
final_model_path = self.output_dir / "model_final.pth"
|
||||
torch.save(self.model.state_dict(), final_model_path)
|
||||
print(f"Final model saved to {final_model_path}")
|
||||
|
||||
# Close TensorBoard writer
|
||||
self.writer.close()
|
||||
|
||||
def evaluate(self, test_loader: Optional[DataLoader] = None) -> Dict:
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
test_loader: Test data loader (uses validation loader if None)
|
||||
|
||||
Returns:
|
||||
Dictionary of evaluation metrics
|
||||
"""
|
||||
if test_loader is None:
|
||||
test_loader = self.val_loader
|
||||
|
||||
self.model.eval()
|
||||
all_metrics = []
|
||||
|
||||
print("Evaluating model...")
|
||||
with torch.no_grad():
|
||||
for batch in tqdm(test_loader):
|
||||
# Move data to device
|
||||
google_img = batch["google_img"].to(self.device)
|
||||
yandex_img = batch["yandex_img"].to(self.device)
|
||||
target_homography = batch["homography"].to(self.device)
|
||||
|
||||
# Forward pass
|
||||
pred_homography = self.model(google_img, yandex_img, return_matrix=True)
|
||||
|
||||
# Compute metrics
|
||||
metrics = self.criterion.compute_metrics(
|
||||
pred_homography, target_homography
|
||||
)
|
||||
all_metrics.append(metrics)
|
||||
|
||||
# Aggregate metrics
|
||||
avg_metrics = {}
|
||||
for key in all_metrics[0].keys():
|
||||
avg_metrics[key] = np.mean([m[key] for m in all_metrics])
|
||||
|
||||
# Print evaluation results
|
||||
print("\nEvaluation Results:")
|
||||
for metric_name, metric_value in avg_metrics.items():
|
||||
print(f" {metric_name}: {metric_value:.6f}")
|
||||
|
||||
# Save evaluation results
|
||||
eval_path = self.output_dir / "evaluation_results.json"
|
||||
with open(eval_path, "w") as f:
|
||||
json.dump(avg_metrics, f, indent=2)
|
||||
print(f"Evaluation results saved to {eval_path}")
|
||||
|
||||
return avg_metrics
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description="Train homography estimation model")
|
||||
|
||||
# Data arguments
|
||||
parser.add_argument(
|
||||
"--data_dir",
|
||||
type=str,
|
||||
default=r"C:\Users\admin\Projects\autopilot\datasets\ya_go_maps\images",
|
||||
help="Directory containing image pairs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch_size", type=int, default=32, help="Batch size for training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image_size",
|
||||
type=int,
|
||||
nargs=2,
|
||||
default=[256, 256],
|
||||
help="Image size (height width)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--train_split", type=float, default=0.8, help="Train/validation split ratio"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_workers", type=int, default=4, help="Number of data loader workers"
|
||||
)
|
||||
|
||||
# Model arguments
|
||||
parser.add_argument(
|
||||
"--model_type", type=str, default="cnn", choices=["cnn"], help="Model type"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hidden_channels", type=int, default=64, help="Number of hidden channels"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_blocks", type=int, default=4, help="Number of convolutional blocks"
|
||||
)
|
||||
parser.add_argument("--dropout_rate", type=float, default=0.3, help="Dropout rate")
|
||||
parser.add_argument(
|
||||
"--use_batch_norm", action="store_true", help="Use batch normalization"
|
||||
)
|
||||
|
||||
# Training arguments
|
||||
parser.add_argument("--epochs", type=int, default=100, help="Number of epochs")
|
||||
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate")
|
||||
parser.add_argument("--weight_decay", type=float, default=1e-4, help="Weight decay")
|
||||
parser.add_argument(
|
||||
"--optimizer", type=str, default="adam", choices=["adam", "adamw", "sgd"]
|
||||
)
|
||||
parser.add_argument(
|
||||
"--scheduler",
|
||||
type=str,
|
||||
default="plateau",
|
||||
choices=["plateau", "cosine", "step", "none"],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--grad_clip", type=float, default=1.0, help="Gradient clipping value"
|
||||
)
|
||||
|
||||
# Loss arguments
|
||||
parser.add_argument(
|
||||
"--matrix_weight", type=float, default=1.0, help="Weight for matrix loss"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--geometric_weight",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Weight for geometric loss",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reg_weight", type=float, default=0.1, help="Weight for regularization loss"
|
||||
)
|
||||
|
||||
# Other arguments
|
||||
parser.add_argument(
|
||||
"--output_dir",
|
||||
type=str,
|
||||
default="runs/homography",
|
||||
help="Output directory for checkpoints and logs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
type=str,
|
||||
help="Path to checkpoint to resume training from",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eval_only",
|
||||
action="store_true",
|
||||
help="Only evaluate the model (no training)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed", type=int, default=42, help="Random seed for reproducibility"
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main training function."""
|
||||
args = parse_args()
|
||||
|
||||
# Set random seeds for reproducibility
|
||||
torch.manual_seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed(args.seed)
|
||||
torch.cuda.manual_seed_all(args.seed)
|
||||
|
||||
# Set device
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# Create data loaders
|
||||
print("Creating data loaders...")
|
||||
train_loader, val_loader = create_data_loaders(
|
||||
root_dir=args.data_dir,
|
||||
batch_size=args.batch_size,
|
||||
train_split=args.train_split,
|
||||
num_workers=args.num_workers,
|
||||
image_size=tuple(args.image_size),
|
||||
augment_train=True,
|
||||
augment_val=False,
|
||||
)
|
||||
|
||||
print(f"Train batches: {len(train_loader)}")
|
||||
print(f"Val batches: {len(val_loader)}")
|
||||
|
||||
# Create model
|
||||
print("Creating model...")
|
||||
model = create_homography_model(
|
||||
model_type=args.model_type,
|
||||
input_size=tuple(args.image_size),
|
||||
input_channels=3,
|
||||
hidden_channels=args.hidden_channels,
|
||||
num_blocks=args.num_blocks,
|
||||
dropout_rate=args.dropout_rate,
|
||||
use_batch_norm=args.use_batch_norm,
|
||||
)
|
||||
|
||||
# Create trainer configuration
|
||||
config = {
|
||||
# Model config
|
||||
"model_type": args.model_type,
|
||||
"hidden_channels": args.hidden_channels,
|
||||
"num_blocks": args.num_blocks,
|
||||
"dropout_rate": args.dropout_rate,
|
||||
"use_batch_norm": args.use_batch_norm,
|
||||
"image_size": args.image_size,
|
||||
# Training config
|
||||
"epochs": args.epochs,
|
||||
"batch_size": args.batch_size,
|
||||
"learning_rate": args.lr,
|
||||
"weight_decay": args.weight_decay,
|
||||
"optimizer": args.optimizer,
|
||||
"scheduler": args.scheduler,
|
||||
"grad_clip": args.grad_clip,
|
||||
# Loss config
|
||||
"matrix_weight": args.matrix_weight,
|
||||
"geometric_weight": args.geometric_weight,
|
||||
"reg_weight": args.reg_weight,
|
||||
"grid_size": 8,
|
||||
# Data config
|
||||
"data_dir": args.data_dir,
|
||||
"train_split": args.train_split,
|
||||
"num_workers": args.num_workers,
|
||||
# Output config
|
||||
"output_dir": args.output_dir,
|
||||
"seed": args.seed,
|
||||
}
|
||||
|
||||
# Create trainer
|
||||
trainer = HomographyTrainer(
|
||||
model=model,
|
||||
train_loader=train_loader,
|
||||
val_loader=val_loader,
|
||||
device=device,
|
||||
config=config,
|
||||
)
|
||||
|
||||
# Resume from checkpoint if specified
|
||||
if args.resume:
|
||||
print(f"Resuming from checkpoint: {args.resume}")
|
||||
trainer.load_checkpoint(args.resume)
|
||||
|
||||
# Evaluate only mode
|
||||
if args.eval_only:
|
||||
trainer.evaluate()
|
||||
return
|
||||
|
||||
# Train the model
|
||||
trainer.train(num_epochs=args.epochs)
|
||||
|
||||
# Final evaluation
|
||||
print("\nPerforming final evaluation...")
|
||||
trainer.evaluate()
|
||||
|
||||
print("\nTraining completed successfully!")
|
||||
print(f"Results saved to: {args.output_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,418 +0,0 @@
|
||||
"""
|
||||
Расширенная система автопилота с улучшенным отслеживанием координат и угла БПЛА
|
||||
"""
|
||||
|
||||
import random
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import math
|
||||
from typing import Optional, Tuple, Dict, List
|
||||
from dataclasses import dataclass
|
||||
import time
|
||||
|
||||
random.seed(1)
|
||||
|
||||
@dataclass
|
||||
class DroneState:
|
||||
"""Структура для хранения состояния БПЛА"""
|
||||
x: float = 0.0
|
||||
y: float = 0.0
|
||||
altitude: float = 10.0
|
||||
angle: float = 0.0
|
||||
velocity_x: float = 0.0
|
||||
velocity_y: float = 0.0
|
||||
angular_velocity: float = 0.0
|
||||
timestamp: float = 0.0
|
||||
confidence: float = 1.0
|
||||
|
||||
class AdvancedAutoPilot:
|
||||
"""
|
||||
Расширенный автопилот с улучшенным отслеживанием координат и угла БПЛА
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
initial_x: float = 0.0,
|
||||
initial_y: float = 0.0,
|
||||
initial_altitude: float = 10.0,
|
||||
camera_fov: float = 60.0,
|
||||
image_width: int = 640,
|
||||
image_height: int = 480):
|
||||
|
||||
# Состояние БПЛА
|
||||
self.state = DroneState(
|
||||
x=initial_x,
|
||||
y=initial_y,
|
||||
altitude=initial_altitude,
|
||||
timestamp=time.time()
|
||||
)
|
||||
|
||||
# Параметры камеры
|
||||
self.camera_fov = math.radians(camera_fov)
|
||||
self.image_width = image_width
|
||||
self.image_height = image_height
|
||||
|
||||
# Вычисляем коэффициент перевода пикселей в метры
|
||||
self.update_pixel_to_meter_ratio()
|
||||
|
||||
# История состояний для фильтрации
|
||||
self.state_history: List[DroneState] = []
|
||||
self.max_history_size = 10
|
||||
|
||||
# Параметры фильтрации
|
||||
self.velocity_filter_alpha = 0.3 # Коэффициент сглаживания скорости
|
||||
self.position_filter_alpha = 0.7 # Коэффициент сглаживания позиции
|
||||
|
||||
# Инициализация детекторов
|
||||
self.orb_detector = cv2.ORB_create(
|
||||
nfeatures=2000,
|
||||
scaleFactor=1.2,
|
||||
nlevels=8,
|
||||
edgeThreshold=31,
|
||||
firstLevel=0,
|
||||
WTA_K=2,
|
||||
patchSize=31,
|
||||
fastThreshold=20
|
||||
)
|
||||
|
||||
self.bf_matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
|
||||
|
||||
# Предыдущее изображение
|
||||
self.prev_image: Optional[np.ndarray] = None
|
||||
self.prev_timestamp: float = 0.0
|
||||
|
||||
# Счетчики
|
||||
self.frame_count = 0
|
||||
self.successful_tracking_frames = 0
|
||||
|
||||
# Статистика
|
||||
self.tracking_stats = {
|
||||
'total_frames': 0,
|
||||
'successful_frames': 0,
|
||||
'failed_frames': 0,
|
||||
'avg_processing_time': 0.0
|
||||
}
|
||||
|
||||
def update_pixel_to_meter_ratio(self):
|
||||
"""Обновляет коэффициент перевода пикселей в метры на основе высоты и FOV камеры"""
|
||||
# Вычисляем размер пикселя на земле при текущей высоте
|
||||
ground_distance = 2 * self.state.altitude * math.tan(self.camera_fov / 2)
|
||||
self.pixel_to_meter_ratio = ground_distance / self.image_width
|
||||
|
||||
def detect_and_match_keypoints(self, img1: np.ndarray, img2: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[List], Optional[List], Optional[List]]:
|
||||
"""Обнаруживает и сопоставляет ключевые точки между двумя изображениями"""
|
||||
# Обнаружение ключевых точек и дескрипторов
|
||||
kp1, des1 = self.orb_detector.detectAndCompute(img1, None)
|
||||
kp2, des2 = self.orb_detector.detectAndCompute(img2, None)
|
||||
|
||||
if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
|
||||
return None, None, None, None, None
|
||||
|
||||
# Сопоставление ключевых точек
|
||||
matches = self.bf_matcher.match(des1, des2)
|
||||
matches = sorted(matches, key=lambda x: x.distance)
|
||||
|
||||
# Фильтрация хороших совпадений
|
||||
good_matches = []
|
||||
for match in matches:
|
||||
if match.distance < 50: # Порог расстояния
|
||||
good_matches.append(match)
|
||||
|
||||
if len(good_matches) < 8: # Увеличиваем минимальное количество совпадений
|
||||
return None, None, None, None, None
|
||||
|
||||
# Извлечение координат сопоставленных точек
|
||||
src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
||||
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
||||
|
||||
return src_pts, dst_pts, good_matches, kp1, kp2
|
||||
|
||||
def estimate_transformation_matrix(self, src_pts: np.ndarray, dst_pts: np.ndarray) -> Optional[Dict]:
|
||||
"""Оценивает матрицу трансформации с улучшенной обработкой ошибок"""
|
||||
try:
|
||||
# Используем RANSAC для оценки матрицы гомографии
|
||||
H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
|
||||
|
||||
if H is None:
|
||||
return None
|
||||
|
||||
# Проверяем качество матрицы
|
||||
if not self.is_valid_homography(H):
|
||||
return None
|
||||
|
||||
# Извлекаем параметры трансформации
|
||||
a11, a12 = H[0, 0], H[0, 1]
|
||||
a21, a22 = H[1, 0], H[1, 1]
|
||||
tx, ty = H[0, 2], H[1, 2]
|
||||
|
||||
# Вычисляем угол поворота
|
||||
angle = np.arctan2(a21, a11)
|
||||
|
||||
# Вычисляем масштаб
|
||||
scale_x = np.sqrt(a11**2 + a21**2)
|
||||
scale_y = np.sqrt(a12**2 + a22**2)
|
||||
scale = (scale_x + scale_y) / 2
|
||||
|
||||
# Проверяем разумность масштаба
|
||||
if scale < 0.5 or scale > 2.0:
|
||||
return None
|
||||
|
||||
return {
|
||||
'translation': (tx, ty),
|
||||
'rotation': angle,
|
||||
'scale': scale,
|
||||
'homography': H,
|
||||
'mask': mask,
|
||||
'inliers_ratio': np.sum(mask) / len(mask) if mask is not None else 0.0
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Ошибка при оценке трансформации: {e}")
|
||||
return None
|
||||
|
||||
def is_valid_homography(self, H: np.ndarray) -> bool:
|
||||
"""Проверяет валидность матрицы гомографии"""
|
||||
if H is None:
|
||||
return False
|
||||
|
||||
# Проверяем, что матрица не вырождена
|
||||
det = np.linalg.det(H)
|
||||
if abs(det) < 1e-6:
|
||||
return False
|
||||
|
||||
# Проверяем, что элементы матрицы разумны
|
||||
if np.any(np.abs(H) > 1000):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def update_drone_position(self, transformation_info: Dict, dt: float):
|
||||
"""Обновляет позицию и состояние БПЛА с учетом временного интервала"""
|
||||
tx, ty = transformation_info['translation']
|
||||
rotation = transformation_info['rotation']
|
||||
scale = transformation_info['scale']
|
||||
inliers_ratio = transformation_info.get('inliers_ratio', 1.0)
|
||||
|
||||
# Конвертируем смещение в пикселях в метры
|
||||
dx_meters = tx * self.pixel_to_meter_ratio
|
||||
dy_meters = ty * self.pixel_to_meter_ratio
|
||||
|
||||
# Применяем поворот к смещению (учитываем текущий угол БПЛА)
|
||||
cos_angle = math.cos(self.state.angle)
|
||||
sin_angle = math.sin(self.state.angle)
|
||||
|
||||
# Поворачиваем смещение в глобальные координаты
|
||||
dx_global = dx_meters * cos_angle - dy_meters * sin_angle
|
||||
dy_global = dx_meters * sin_angle + dy_meters * cos_angle
|
||||
|
||||
# Вычисляем скорости
|
||||
velocity_x = dx_global / dt if dt > 0 else 0.0
|
||||
velocity_y = dy_global / dt if dt > 0 else 0.0
|
||||
angular_velocity = rotation / dt if dt > 0 else 0.0
|
||||
|
||||
# Применяем фильтрацию
|
||||
self.state.velocity_x = (self.velocity_filter_alpha * velocity_x +
|
||||
(1 - self.velocity_filter_alpha) * self.state.velocity_x)
|
||||
self.state.velocity_y = (self.velocity_filter_alpha * velocity_y +
|
||||
(1 - self.velocity_filter_alpha) * self.state.velocity_y)
|
||||
self.state.angular_velocity = (self.velocity_filter_alpha * angular_velocity +
|
||||
(1 - self.velocity_filter_alpha) * self.state.angular_velocity)
|
||||
|
||||
# Обновляем позицию с фильтрацией
|
||||
new_x = self.state.x + dx_global
|
||||
new_y = self.state.y + dy_global
|
||||
new_angle = self.state.angle + rotation
|
||||
|
||||
self.state.x = (self.position_filter_alpha * new_x +
|
||||
(1 - self.position_filter_alpha) * self.state.x)
|
||||
self.state.y = (self.position_filter_alpha * new_y +
|
||||
(1 - self.position_filter_alpha) * self.state.y)
|
||||
self.state.angle = (self.position_filter_alpha * new_angle +
|
||||
(1 - self.position_filter_alpha) * self.state.angle)
|
||||
|
||||
# Нормализуем угол
|
||||
self.state.angle = math.atan2(math.sin(self.state.angle), math.cos(self.state.angle))
|
||||
|
||||
# Обновляем временную метку и уверенность
|
||||
self.state.timestamp = time.time()
|
||||
self.state.confidence = min(1.0, inliers_ratio * 1.5) # Увеличиваем уверенность при хороших совпадениях
|
||||
|
||||
def handle(self, image: Image.Image) -> bool:
|
||||
"""Обрабатывает новый кадр и обновляет состояние БПЛА"""
|
||||
start_time = time.time()
|
||||
self.frame_count += 1
|
||||
self.tracking_stats['total_frames'] += 1
|
||||
|
||||
# Конвертируем изображение
|
||||
current_image = np.array(image)
|
||||
current_timestamp = time.time()
|
||||
|
||||
if self.prev_image is None:
|
||||
self.prev_image = current_image
|
||||
self.prev_timestamp = current_timestamp
|
||||
return False
|
||||
|
||||
# Вычисляем временной интервал
|
||||
dt = current_timestamp - self.prev_timestamp
|
||||
|
||||
# Обнаруживаем и сопоставляем ключевые точки
|
||||
src_pts, dst_pts, matches, kp1, kp2 = self.detect_and_match_keypoints(self.prev_image, current_image)
|
||||
|
||||
if src_pts is not None and dst_pts is not None:
|
||||
# Оцениваем матрицу трансформации
|
||||
transformation_info = self.estimate_transformation_matrix(src_pts, dst_pts)
|
||||
|
||||
if transformation_info:
|
||||
# Обновляем позицию БПЛА
|
||||
self.update_drone_position(transformation_info, dt)
|
||||
|
||||
# Добавляем состояние в историю
|
||||
self.add_state_to_history()
|
||||
|
||||
# Обновляем статистику
|
||||
self.successful_tracking_frames += 1
|
||||
self.tracking_stats['successful_frames'] += 1
|
||||
|
||||
# Выводим информацию
|
||||
self.print_tracking_info(transformation_info)
|
||||
|
||||
# Визуализация
|
||||
self.visualize_tracking(current_image, kp1, kp2, matches, transformation_info)
|
||||
|
||||
success = True
|
||||
else:
|
||||
self.tracking_stats['failed_frames'] += 1
|
||||
success = False
|
||||
else:
|
||||
self.tracking_stats['failed_frames'] += 1
|
||||
success = False
|
||||
|
||||
# Обновляем предыдущее изображение
|
||||
self.prev_image = current_image
|
||||
self.prev_timestamp = current_timestamp
|
||||
|
||||
# Обновляем статистику времени обработки
|
||||
processing_time = time.time() - start_time
|
||||
self.tracking_stats['avg_processing_time'] = (
|
||||
(self.tracking_stats['avg_processing_time'] * (self.frame_count - 1) + processing_time) / self.frame_count
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
def add_state_to_history(self):
|
||||
"""Добавляет текущее состояние в историю"""
|
||||
# Создаем копию текущего состояния
|
||||
state_copy = DroneState(
|
||||
x=self.state.x,
|
||||
y=self.state.y,
|
||||
altitude=self.state.altitude,
|
||||
angle=self.state.angle,
|
||||
velocity_x=self.state.velocity_x,
|
||||
velocity_y=self.state.velocity_y,
|
||||
angular_velocity=self.state.angular_velocity,
|
||||
timestamp=self.state.timestamp,
|
||||
confidence=self.state.confidence
|
||||
)
|
||||
|
||||
self.state_history.append(state_copy)
|
||||
|
||||
# Ограничиваем размер истории
|
||||
if len(self.state_history) > self.max_history_size:
|
||||
self.state_history.pop(0)
|
||||
|
||||
def get_drone_state(self) -> Dict:
|
||||
"""Возвращает текущее состояние БПЛА в виде словаря"""
|
||||
return {
|
||||
'x': self.state.x,
|
||||
'y': self.state.y,
|
||||
'altitude': self.state.altitude,
|
||||
'angle': self.state.angle,
|
||||
'angle_degrees': math.degrees(self.state.angle),
|
||||
'velocity_x': self.state.velocity_x,
|
||||
'velocity_y': self.state.velocity_y,
|
||||
'angular_velocity': self.state.angular_velocity,
|
||||
'confidence': self.state.confidence,
|
||||
'frame_count': self.frame_count,
|
||||
'timestamp': self.state.timestamp
|
||||
}
|
||||
|
||||
def get_tracking_stats(self) -> Dict:
|
||||
"""Возвращает статистику отслеживания"""
|
||||
success_rate = (self.tracking_stats['successful_frames'] /
|
||||
max(1, self.tracking_stats['total_frames'])) * 100
|
||||
|
||||
return {
|
||||
**self.tracking_stats,
|
||||
'success_rate_percent': success_rate,
|
||||
'current_altitude': self.state.altitude,
|
||||
'pixel_to_meter_ratio': self.pixel_to_meter_ratio
|
||||
}
|
||||
|
||||
def set_altitude(self, altitude: float):
|
||||
"""Устанавливает высоту БПЛА и пересчитывает масштаб"""
|
||||
self.state.altitude = altitude
|
||||
self.update_pixel_to_meter_ratio()
|
||||
|
||||
def reset_position(self, x: float = 0.0, y: float = 0.0, angle: float = 0.0):
|
||||
"""Сбрасывает позицию и угол БПЛА"""
|
||||
self.state.x = x
|
||||
self.state.y = y
|
||||
self.state.angle = angle
|
||||
self.state.velocity_x = 0.0
|
||||
self.state.velocity_y = 0.0
|
||||
self.state.angular_velocity = 0.0
|
||||
self.state_history.clear()
|
||||
self.frame_count = 0
|
||||
self.successful_tracking_frames = 0
|
||||
|
||||
def print_tracking_info(self, transformation_info: Dict):
|
||||
"""Выводит информацию об отслеживании"""
|
||||
state = self.get_drone_state()
|
||||
stats = self.get_tracking_stats()
|
||||
|
||||
print(f"Frame {self.frame_count}:")
|
||||
print(f" Position: ({state['x']:.3f}, {state['y']:.3f}) m")
|
||||
print(f" Angle: {state['angle_degrees']:.1f}°")
|
||||
print(f" Velocity: ({state['velocity_x']:.2f}, {state['velocity_y']:.2f}) m/s")
|
||||
print(f" Confidence: {state['confidence']:.2f}")
|
||||
print(f" Success Rate: {stats['success_rate_percent']:.1f}%")
|
||||
|
||||
def visualize_tracking(self, current_image: np.ndarray, kp1, kp2, matches, transformation_info: Dict):
|
||||
"""Визуализирует процесс отслеживания"""
|
||||
if kp1 is None or kp2 is None or matches is None:
|
||||
return
|
||||
|
||||
# Рисуем сопоставления
|
||||
img_matches = cv2.drawMatches(self.prev_image, kp1, current_image, kp2, matches, None,
|
||||
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
|
||||
|
||||
# Добавляем информацию о состоянии
|
||||
state = self.get_drone_state()
|
||||
stats = self.get_tracking_stats()
|
||||
|
||||
info_lines = [
|
||||
f"Position: ({state['x']:.2f}, {state['y']:.2f}) m",
|
||||
f"Angle: {state['angle_degrees']:.1f}°",
|
||||
f"Velocity: ({state['velocity_x']:.2f}, {state['velocity_y']:.2f}) m/s",
|
||||
f"Altitude: {state['altitude']:.1f} m",
|
||||
f"Confidence: {state['confidence']:.2f}",
|
||||
f"Success Rate: {stats['success_rate_percent']:.1f}%"
|
||||
]
|
||||
|
||||
for i, line in enumerate(info_lines):
|
||||
cv2.putText(img_matches, line, (10, 30 + i * 25),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
||||
|
||||
cv2.imshow('Advanced Drone Tracking', img_matches)
|
||||
cv2.waitKey(1)
|
||||
|
||||
def act(self) -> float:
|
||||
"""Возвращает угол поворота для управления дроном"""
|
||||
return self.state.angle
|
||||
|
||||
def get_trajectory_history(self) -> Tuple[List[float], List[float], List[float]]:
|
||||
"""Возвращает историю траектории"""
|
||||
x_history = [state.x for state in self.state_history]
|
||||
y_history = [state.y for state in self.state_history]
|
||||
angle_history = [math.degrees(state.angle) for state in self.state_history]
|
||||
|
||||
return x_history, y_history, angle_history
|
||||
Binary file not shown.
222
old/examples.py
222
old/examples.py
@@ -1,222 +0,0 @@
|
||||
"""
|
||||
Примеры использования системы отслеживания БПЛА
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
from autopilot import AutoPilot
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def example_basic_usage():
|
||||
"""Базовый пример использования автопилота"""
|
||||
print("=== Базовый пример использования ===")
|
||||
|
||||
# Создаем автопилот с начальными параметрами
|
||||
autopilot = AutoPilot(
|
||||
initial_x=0.0, # Начальная X координата
|
||||
initial_y=0.0, # Начальная Y координата
|
||||
initial_altitude=15.0 # Начальная высота
|
||||
)
|
||||
|
||||
# Создаем тестовые изображения
|
||||
img1 = create_simple_test_image()
|
||||
img2 = create_simple_test_image(offset_x=20, offset_y=10)
|
||||
|
||||
# Обрабатываем изображения
|
||||
autopilot.handle(img1)
|
||||
autopilot.handle(img2)
|
||||
|
||||
# Получаем состояние
|
||||
state = autopilot.get_drone_state()
|
||||
print(f"Позиция БПЛА: ({state['x']:.2f}, {state['y']:.2f})")
|
||||
print(f"Угол БПЛА: {state['angle_degrees']:.1f}°")
|
||||
print(f"Высота: {state['altitude']:.1f}м")
|
||||
print(f"Обработано кадров: {state['frame_count']}")
|
||||
|
||||
def example_altitude_change():
|
||||
"""Пример изменения высоты и пересчета масштаба"""
|
||||
print("\n=== Пример изменения высоты ===")
|
||||
|
||||
autopilot = AutoPilot(initial_altitude=10.0)
|
||||
|
||||
# Создаем изображения
|
||||
img1 = create_simple_test_image()
|
||||
img2 = create_simple_test_image(offset_x=10, offset_y=5)
|
||||
|
||||
# Обрабатываем при высоте 10м
|
||||
autopilot.handle(img1)
|
||||
autopilot.handle(img2)
|
||||
|
||||
state1 = autopilot.get_drone_state()
|
||||
print(f"При высоте 10м: ({state1['x']:.2f}, {state1['y']:.2f})")
|
||||
|
||||
# Изменяем высоту на 20м
|
||||
autopilot.set_altitude(20.0)
|
||||
|
||||
# Обрабатываем те же изображения при новой высоте
|
||||
autopilot.reset_position()
|
||||
autopilot.handle(img1)
|
||||
autopilot.handle(img2)
|
||||
|
||||
state2 = autopilot.get_drone_state()
|
||||
print(f"При высоте 20м: ({state2['x']:.2f}, {state2['y']:.2f})")
|
||||
|
||||
# Коэффициент должен измениться в 2 раза
|
||||
ratio = state2['x'] / state1['x'] if state1['x'] != 0 else 0
|
||||
print(f"Коэффициент изменения: {ratio:.2f} (ожидается ~2.0)")
|
||||
|
||||
def example_circular_movement():
|
||||
"""Пример отслеживания кругового движения"""
|
||||
print("\n=== Пример кругового движения ===")
|
||||
|
||||
autopilot = AutoPilot()
|
||||
|
||||
# Создаем базовое изображение
|
||||
base_img = create_simple_test_image()
|
||||
|
||||
# Параметры кругового движения
|
||||
radius = 30
|
||||
center_x, center_y = 320, 240
|
||||
num_steps = 8
|
||||
|
||||
x_history = []
|
||||
y_history = []
|
||||
angle_history = []
|
||||
|
||||
# Симулируем круговое движение
|
||||
for i in range(num_steps):
|
||||
angle = 2 * np.pi * i / num_steps
|
||||
|
||||
# Вычисляем смещение
|
||||
dx = radius * np.cos(angle)
|
||||
dy = radius * np.sin(angle)
|
||||
|
||||
# Создаем смещенное изображение
|
||||
current_img = create_simple_test_image(
|
||||
offset_x=int(dx),
|
||||
offset_y=int(dy),
|
||||
rotation=angle * 0.1
|
||||
)
|
||||
|
||||
# Обрабатываем
|
||||
autopilot.handle(current_img)
|
||||
|
||||
# Сохраняем историю
|
||||
state = autopilot.get_drone_state()
|
||||
x_history.append(state['x'])
|
||||
y_history.append(state['y'])
|
||||
angle_history.append(state['angle_degrees'])
|
||||
|
||||
print(f"Шаг {i+1}: ({state['x']:.2f}, {state['y']:.2f}), угол: {state['angle_degrees']:.1f}°")
|
||||
|
||||
# Визуализируем траекторию
|
||||
plot_circular_trajectory(x_history, y_history, angle_history)
|
||||
|
||||
def example_position_reset():
|
||||
"""Пример сброса позиции"""
|
||||
print("\n=== Пример сброса позиции ===")
|
||||
|
||||
autopilot = AutoPilot()
|
||||
|
||||
# Создаем и обрабатываем изображения
|
||||
img1 = create_simple_test_image()
|
||||
img2 = create_simple_test_image(offset_x=15, offset_y=10)
|
||||
|
||||
autopilot.handle(img1)
|
||||
autopilot.handle(img2)
|
||||
|
||||
state_before = autopilot.get_drone_state()
|
||||
print(f"До сброса: ({state_before['x']:.2f}, {state_before['y']:.2f})")
|
||||
|
||||
# Сбрасываем позицию
|
||||
autopilot.reset_position(x=100.0, y=200.0, angle=np.pi/4)
|
||||
|
||||
state_after = autopilot.get_drone_state()
|
||||
print(f"После сброса: ({state_after['x']:.2f}, {state_after['y']:.2f}), угол: {state_after['angle_degrees']:.1f}°")
|
||||
|
||||
def create_simple_test_image(offset_x=0, offset_y=0, rotation=0):
|
||||
"""Создает простое тестовое изображение с заданным смещением"""
|
||||
width, height = 640, 480
|
||||
|
||||
# Создаем базовое изображение с геометрическими фигурами
|
||||
img = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
|
||||
# Добавляем фигуры
|
||||
cv2.rectangle(img, (100, 100), (200, 200), (255, 0, 0), -1)
|
||||
cv2.circle(img, (400, 300), 50, (0, 255, 0), -1)
|
||||
cv2.line(img, (50, 400), (550, 400), (0, 0, 255), 5)
|
||||
cv2.rectangle(img, (300, 150), (350, 200), (255, 255, 0), -1)
|
||||
|
||||
# Применяем трансформацию
|
||||
if offset_x != 0 or offset_y != 0 or rotation != 0:
|
||||
cos_rot = np.cos(rotation)
|
||||
sin_rot = np.sin(rotation)
|
||||
|
||||
M = np.float32([
|
||||
[cos_rot, -sin_rot, offset_x],
|
||||
[sin_rot, cos_rot, offset_y]
|
||||
])
|
||||
|
||||
img = cv2.warpAffine(img, M, (width, height),
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
return Image.fromarray(img)
|
||||
|
||||
def plot_circular_trajectory(x_history, y_history, angle_history):
|
||||
"""Визуализирует круговую траекторию"""
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||
|
||||
# График траектории
|
||||
ax1.plot(x_history, y_history, 'b-o', linewidth=2, markersize=8)
|
||||
ax1.plot(x_history[0], y_history[0], 'go', markersize=12, label='Начало')
|
||||
ax1.plot(x_history[-1], y_history[-1], 'ro', markersize=12, label='Конец')
|
||||
ax1.set_xlabel('X координата (м)')
|
||||
ax1.set_ylabel('Y координата (м)')
|
||||
ax1.set_title('Круговая траектория БПЛА')
|
||||
ax1.grid(True)
|
||||
ax1.legend()
|
||||
ax1.axis('equal')
|
||||
|
||||
# График угла
|
||||
ax2.plot(range(len(angle_history)), angle_history, 'r-o', linewidth=2, markersize=8)
|
||||
ax2.set_xlabel('Номер шага')
|
||||
ax2.set_ylabel('Угол поворота (градусы)')
|
||||
ax2.set_title('Изменение угла поворота')
|
||||
ax2.grid(True)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def example_error_handling():
|
||||
"""Пример обработки ошибок и граничных случаев"""
|
||||
print("\n=== Пример обработки ошибок ===")
|
||||
|
||||
autopilot = AutoPilot()
|
||||
|
||||
# Создаем изображение без текстуры (однородное)
|
||||
blank_img = Image.fromarray(np.zeros((480, 640, 3), dtype=np.uint8))
|
||||
|
||||
# Обрабатываем - должно работать без ошибок
|
||||
try:
|
||||
autopilot.handle(blank_img)
|
||||
autopilot.handle(blank_img)
|
||||
print("Обработка однородного изображения прошла успешно")
|
||||
except Exception as e:
|
||||
print(f"Ошибка при обработке: {e}")
|
||||
|
||||
# Проверяем состояние
|
||||
state = autopilot.get_drone_state()
|
||||
print(f"Состояние после обработки: ({state['x']:.2f}, {state['y']:.2f})")
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Примеры использования системы отслеживания БПЛА\n")
|
||||
|
||||
# Запускаем все примеры
|
||||
example_basic_usage()
|
||||
example_altitude_change()
|
||||
example_circular_movement()
|
||||
example_position_reset()
|
||||
example_error_handling()
|
||||
|
||||
print("\nВсе примеры выполнены успешно!")
|
||||
13
old/main.py
13
old/main.py
@@ -1,13 +0,0 @@
|
||||
from autopilot import AutoPilot, RandomPilot
|
||||
from simulator import Simulator
|
||||
from visualization import VisualizationManager
|
||||
|
||||
# Создаем менеджер визуализации
|
||||
viz_manager = VisualizationManager("Drone Autopilot - Global Map & Detection")
|
||||
|
||||
# Создаем симулятор с AutoPilot для обработки изображений
|
||||
# Передаем менеджер визуализации в автопилот
|
||||
simulator = Simulator(RandomPilot(), AutoPilot(viz_manager=viz_manager), viz_manager=viz_manager)
|
||||
|
||||
# Запускаем симуляцию
|
||||
simulator.loop()
|
||||
@@ -1,56 +0,0 @@
|
||||
asttokens==3.0.0
|
||||
attrs==25.3.0
|
||||
certifi==2025.1.31
|
||||
cffi==1.17.1
|
||||
colorama==0.4.6
|
||||
comm==0.2.2
|
||||
contourpy==1.3.1
|
||||
cycler==0.12.1
|
||||
debugpy==1.8.14
|
||||
decorator==5.2.1
|
||||
executing==2.2.0
|
||||
fonttools==4.57.0
|
||||
h11==0.14.0
|
||||
idna==3.10
|
||||
ipykernel==6.29.5
|
||||
ipython==9.3.0
|
||||
ipython_pygments_lexers==1.1.1
|
||||
jedi==0.19.2
|
||||
jupyter_client==8.6.3
|
||||
jupyter_core==5.8.1
|
||||
kiwisolver==1.4.8
|
||||
matplotlib==3.10.1
|
||||
matplotlib-inline==0.1.7
|
||||
nest-asyncio==1.6.0
|
||||
numpy==2.2.4
|
||||
opencv-python==4.11.0.86
|
||||
outcome==1.3.0.post0
|
||||
packaging==24.2
|
||||
parso==0.8.4
|
||||
pillow==11.2.1
|
||||
platformdirs==4.3.8
|
||||
prompt_toolkit==3.0.51
|
||||
psutil==7.0.0
|
||||
pure_eval==0.2.3
|
||||
pycparser==2.22
|
||||
Pygments==2.19.2
|
||||
pyparsing==3.2.3
|
||||
PySocks==1.7.1
|
||||
python-dateutil==2.9.0.post0
|
||||
pywin32==310
|
||||
pyzmq==27.0.0
|
||||
scipy==1.15.2
|
||||
selenium==4.31.0
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
sortedcontainers==2.4.0
|
||||
stack-data==0.6.3
|
||||
tornado==6.5.1
|
||||
traitlets==5.14.3
|
||||
trio==0.29.0
|
||||
trio-websocket==0.12.2
|
||||
typing_extensions==4.13.2
|
||||
urllib3==2.4.0
|
||||
wcwidth==0.2.13
|
||||
websocket-client==1.8.0
|
||||
wsproto==1.2.0
|
||||
173
old/simulator.py
173
old/simulator.py
@@ -1,173 +0,0 @@
|
||||
import math
|
||||
from io import BytesIO
|
||||
from time import sleep
|
||||
|
||||
from PIL import Image
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
|
||||
from autopilot import Pilot
|
||||
from visualization import VisualizationManager, SimMode
|
||||
|
||||
import os
|
||||
|
||||
class Simulator:
|
||||
driver: webdriver.Chrome
|
||||
mode: SimMode
|
||||
|
||||
operatorPilot: Pilot
|
||||
autonomePilot: Pilot
|
||||
|
||||
angle: float
|
||||
|
||||
# Менеджер визуализации
|
||||
viz_manager: VisualizationManager
|
||||
current_x: float
|
||||
current_y: float
|
||||
|
||||
def __init__(self, operatorPilot: Pilot, autonomePilot: Pilot, viz_manager: VisualizationManager = None):
|
||||
self.mode = SimMode.OPERATOR
|
||||
self.operatorPilot = operatorPilot
|
||||
self.autonomePilot = autonomePilot
|
||||
|
||||
# Инициализация переменных для траектории
|
||||
self.current_x = 0.0
|
||||
self.current_y = 0.0
|
||||
|
||||
# Создаем менеджер визуализации
|
||||
self.viz_manager = viz_manager
|
||||
|
||||
# Передаем менеджер визуализации в автопилот, если он поддерживает это
|
||||
if hasattr(self.autonomePilot, 'viz_manager'):
|
||||
self.autonomePilot.viz_manager = self.viz_manager
|
||||
|
||||
# Создаем папку для изображений, если её нет
|
||||
os.makedirs('./images', exist_ok=True)
|
||||
|
||||
options = webdriver.ChromeOptions()
|
||||
# options.add_experimental_option("detach", True)
|
||||
self.driver = webdriver.Chrome(options)
|
||||
self.driver.get("https://yandex.ru/maps/43/kazan/?ll=49.103814%2C55.794258&z=14")
|
||||
sleep(2)
|
||||
|
||||
action = ActionChains(self.driver)
|
||||
|
||||
# Закрытие левой панели
|
||||
action.click(self.driver.find_element(By.CLASS_NAME, 'sidebar-toggle-button'))
|
||||
action.move_to_element_with_offset(self.driver.find_element(By.XPATH, "//div[@class='rounded-controls']/div[@class='rounded-controls__child'][5]//button"), 5, 5)
|
||||
action.perform()
|
||||
|
||||
# Режим спутника
|
||||
sleep(1)
|
||||
action.move_to_element_with_offset(self.driver.find_element(By.XPATH, "//div[@class='rounded-controls']/div[@class='rounded-controls__child'][5]//button"), -500, -500)
|
||||
action.click()
|
||||
action.perform()
|
||||
|
||||
self.angle = 0
|
||||
|
||||
def rotate_image_like_drone(self, image: Image.Image, angle: float) -> Image.Image:
|
||||
"""
|
||||
Поворачивает картинку как будто съемка ведется с летящего дрона.
|
||||
Выделяет концентрический квадрат, поворачивает его и извлекает результат.
|
||||
"""
|
||||
# Получаем размеры изображения
|
||||
width, height = image.size
|
||||
square_size = min(width, height)
|
||||
cropped_image = image.crop((0, 0, square_size, square_size))
|
||||
cropped_image = cropped_image.rotate(angle / math.pi * 180, expand=True)
|
||||
|
||||
# Определяем размер концентрического квадрата (80% от минимальной стороны)
|
||||
local_square_size = int(square_size / 2 ** 0.5)
|
||||
|
||||
# Вычисляем координаты для центрирования квадрата
|
||||
left = (cropped_image.width - local_square_size) // 2
|
||||
top = (cropped_image.height - local_square_size) // 2
|
||||
right = left + local_square_size
|
||||
bottom = top + local_square_size
|
||||
|
||||
# Вырезаем концентрический квадрат
|
||||
final_image = cropped_image.crop((left, top, right, bottom))
|
||||
|
||||
return final_image
|
||||
|
||||
def update_trajectory(self, dx: float, dy: float):
|
||||
"""
|
||||
Обновляет траекторию полета беспилотника
|
||||
"""
|
||||
# Обновляем текущие координаты
|
||||
self.current_x += dx
|
||||
self.current_y += dy
|
||||
|
||||
def update_map(self):
|
||||
"""
|
||||
Обновляет карту траектории полета
|
||||
"""
|
||||
self.viz_manager.update_global_map(self.current_x, self.current_y, self.mode)
|
||||
|
||||
def loop(self):
|
||||
|
||||
html = self.driver.find_element(By.TAG_NAME, 'html')
|
||||
action = ActionChains(self.driver)
|
||||
|
||||
# Добавляем начальную точку в траекторию
|
||||
self.update_trajectory(0, 0)
|
||||
self.viz_manager.update_global_map(self.current_x, self.current_y, self.mode)
|
||||
|
||||
for i in range(1000):
|
||||
signal = None
|
||||
if self.mode == SimMode.OPERATOR:
|
||||
signal = self.operatorPilot.act()
|
||||
if signal is None:
|
||||
self.mode = SimMode.AUTONOME
|
||||
print("Режим возвращения домой!")
|
||||
|
||||
if self.mode == SimMode.AUTONOME:
|
||||
signal = self.autonomePilot.act()
|
||||
|
||||
if signal is None:
|
||||
break
|
||||
|
||||
dangle, velocity = signal
|
||||
drone_x, drone_y = self.autonomePilot.get_position()
|
||||
self.viz_manager.update_error_plot(i, drone_x, drone_y, self.current_x, self.current_y)
|
||||
|
||||
# Сдвиг камеры
|
||||
action = ActionChains(self.driver)
|
||||
action.move_to_element_with_offset(html, 200, 200)
|
||||
action.click_and_hold()
|
||||
|
||||
self.angle += dangle
|
||||
dx = math.cos(self.angle) * velocity
|
||||
dy = math.sin(self.angle) * velocity
|
||||
action.move_by_offset(-dx, dy)
|
||||
action.release()
|
||||
action.perform()
|
||||
|
||||
# Обновляем траекторию
|
||||
self.update_trajectory(dx, dy)
|
||||
|
||||
# Загрузка скриншота
|
||||
png = self.driver.get_screenshot_as_png()
|
||||
im = Image.open(BytesIO(png))
|
||||
im = im.crop([0, 80, im.width-80, im.height-60])
|
||||
|
||||
# Применяем поворот как будто съемка с дрона
|
||||
rotated_im = self.rotate_image_like_drone(im, math.pi / 2 - self.angle)
|
||||
|
||||
# Передаем изображение в AutoPilot для анализа
|
||||
self.autonomePilot.handle(rotated_im)
|
||||
|
||||
# Обновляем визуализацию каждые несколько итераций для производительности
|
||||
self.update_map()
|
||||
self.viz_manager.update_display()
|
||||
|
||||
# Финальное обновление карты
|
||||
self.update_map()
|
||||
self.viz_manager.update_display()
|
||||
print("last position: ", self.driver.current_url)
|
||||
print(f"Финальная позиция: ({self.current_x:.2f}, {self.current_y:.2f})")
|
||||
|
||||
# Показываем карту до закрытия, но не поднимаем на передний план
|
||||
self.viz_manager.show_final()
|
||||
print("Симуляция завершена. Окно визуализации остается открытым для анализа.")
|
||||
@@ -1,55 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100.00078395707533 -0.0007839570753276348\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"h = 100\n",
|
||||
"R = 6378 * 1000\n",
|
||||
"\n",
|
||||
"def solve_x2(a, b, c):\n",
|
||||
" d = b ** 2 - 4 * a * c\n",
|
||||
" x1 = (-b - d ** 0.5) / (2 * a)\n",
|
||||
" x2 = (-b + d ** 0.5) / (2 * a)\n",
|
||||
" return x1, x2\n",
|
||||
"\n",
|
||||
"x1, x2 = solve_x2(2, -2 * (h + R), 2 * h * R + h ** 2)\n",
|
||||
"\n",
|
||||
"y = h - x1\n",
|
||||
"\n",
|
||||
"print(x1, y)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Тестовый скрипт для проверки работы с координатами относительно центра изображения
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
from autopilot import AutoPilot
|
||||
|
||||
def test_center_coordinates():
|
||||
"""Тестирует работу с координатами относительно центра изображения"""
|
||||
print("Тест координат относительно центра изображения...")
|
||||
|
||||
# Создаем тестовое изображение
|
||||
width, height = 640, 480
|
||||
test_image = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
|
||||
# Добавляем некоторые объекты для отслеживания
|
||||
cv2.circle(test_image, (width//2, height//2), 50, (255, 255, 255), -1) # Центр
|
||||
cv2.circle(test_image, (100, 100), 20, (255, 0, 0), -1) # Левый верхний угол
|
||||
cv2.circle(test_image, (width-100, height-100), 20, (0, 255, 0), -1) # Правый нижний угол
|
||||
|
||||
# Создаем второе изображение с небольшим смещением
|
||||
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
cv2.circle(test_image2, (width//2 + 10, height//2 + 5), 50, (255, 255, 255), -1) # Смещенный центр
|
||||
cv2.circle(test_image2, (110, 105), 20, (255, 0, 0), -1) # Смещенный левый верхний угол
|
||||
cv2.circle(test_image2, (width-90, height-95), 20, (0, 255, 0), -1) # Смещенный правый нижний угол
|
||||
|
||||
# Конвертируем в PIL Image
|
||||
pil_image1 = Image.fromarray(test_image)
|
||||
pil_image2 = Image.fromarray(test_image2)
|
||||
|
||||
# Создаем автопилот
|
||||
pilot = AutoPilot(initial_x=0.0, initial_y=0.0)
|
||||
|
||||
print(f"Размер изображения: {width}x{height}")
|
||||
print(f"Ожидаемый центр: ({width//2}, {height//2})")
|
||||
|
||||
# Обрабатываем первое изображение
|
||||
pilot.handle(pil_image1)
|
||||
print(f"Центр изображения после первого кадра: {pilot.image_center}")
|
||||
|
||||
# Обрабатываем второе изображение
|
||||
pilot.handle(pil_image2)
|
||||
print(f"Центр изображения после второго кадра: {pilot.image_center}")
|
||||
|
||||
# Получаем состояние дрона
|
||||
drone_state = pilot.get_drone_state()
|
||||
print(f"Позиция дрона: ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
|
||||
print(f"Угол дрона: {drone_state['angle_degrees']:.1f}°")
|
||||
|
||||
print("\nАнализ отцентрированных координат:")
|
||||
print("Ожидаемое смещение центра: (10, 5) пикселей")
|
||||
print("Ожидаемое смещение левого верхнего угла: (10, 5) пикселей")
|
||||
print("Ожидаемое смещение правого нижнего угла: (10, 5) пикселей")
|
||||
|
||||
print("Тест завершен!")
|
||||
|
||||
def test_keypoint_centering():
|
||||
"""Тестирует отцентрирование ключевых точек"""
|
||||
print("\nТест отцентрирования ключевых точек...")
|
||||
|
||||
# Создаем простое изображение с одной точкой
|
||||
width, height = 100, 100
|
||||
test_image = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
cv2.circle(test_image, (25, 25), 5, (255, 255, 255), -1) # Точка в (25, 25)
|
||||
|
||||
# Создаем второе изображение с той же точкой
|
||||
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
cv2.circle(test_image2, (25, 25), 5, (255, 255, 255), -1)
|
||||
|
||||
# Конвертируем в PIL Image
|
||||
pil_image1 = Image.fromarray(test_image)
|
||||
pil_image2 = Image.fromarray(test_image2)
|
||||
|
||||
# Создаем автопилот
|
||||
pilot = AutoPilot(initial_x=0.0, initial_y=0.0)
|
||||
|
||||
print(f"Размер изображения: {width}x{height}")
|
||||
print(f"Центр изображения: ({width//2}, {height//2}) = (50, 50)")
|
||||
print(f"Позиция точки: (25, 25)")
|
||||
print(f"Ожидаемые отцентрированные координаты: (25-50, 25-50) = (-25, -25)")
|
||||
|
||||
# Обрабатываем изображения
|
||||
pilot.handle(pil_image1)
|
||||
pilot.handle(pil_image2)
|
||||
|
||||
drone_state = pilot.get_drone_state()
|
||||
print(f"Результат: позиция дрона ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
|
||||
|
||||
print("Тест завершен!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_center_coordinates()
|
||||
test_keypoint_centering()
|
||||
@@ -1,204 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Тестовый скрипт для проверки системы отслеживания траектории беспилотника
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
from autopilot import AutoPilot, RandomPilot
|
||||
from simulator import Simulator, SimMode
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
def create_test_image(width=640, height=480, pattern_type="random"):
|
||||
"""Создает тестовое изображение с различными паттернами"""
|
||||
if pattern_type == "random":
|
||||
# Создаем изображение с случайными точками
|
||||
img = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
|
||||
# Добавляем несколько четких объектов
|
||||
cv2.rectangle(img, (100, 100), (200, 200), (255, 0, 0), -1)
|
||||
cv2.circle(img, (400, 300), 50, (0, 255, 0), -1)
|
||||
cv2.line(img, (50, 400), (550, 400), (0, 0, 255), 5)
|
||||
elif pattern_type == "grid":
|
||||
# Создаем сетку
|
||||
img = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
for i in range(0, width, 50):
|
||||
cv2.line(img, (i, 0), (i, height), (255, 255, 255), 1)
|
||||
for i in range(0, height, 50):
|
||||
cv2.line(img, (0, i), (width, i), (255, 255, 255), 1)
|
||||
elif pattern_type == "circles":
|
||||
# Создаем изображение с кругами
|
||||
img = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
for i in range(5):
|
||||
x = np.random.randint(50, width-50)
|
||||
y = np.random.randint(50, height-50)
|
||||
radius = np.random.randint(20, 80)
|
||||
color = (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255))
|
||||
cv2.circle(img, (x, y), radius, color, -1)
|
||||
|
||||
return Image.fromarray(img)
|
||||
|
||||
def simulate_drone_movement(base_image, dx, dy, rotation=0):
|
||||
"""Симулирует движение БПЛА, создавая смещенное изображение"""
|
||||
img_array = np.array(base_image)
|
||||
height, width = img_array.shape[:2]
|
||||
|
||||
# Создаем матрицу трансформации
|
||||
cos_rot = np.cos(rotation)
|
||||
sin_rot = np.sin(rotation)
|
||||
|
||||
# Матрица поворота и смещения
|
||||
M = np.float32([
|
||||
[cos_rot, -sin_rot, dx],
|
||||
[sin_rot, cos_rot, dy]
|
||||
])
|
||||
|
||||
# Применяем трансформацию
|
||||
transformed = cv2.warpAffine(img_array, M, (width, height),
|
||||
borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
return Image.fromarray(transformed)
|
||||
|
||||
def test_drone_tracking():
|
||||
"""Тестирует систему отслеживания БПЛА"""
|
||||
print("Запуск тестирования системы отслеживания БПЛА...")
|
||||
|
||||
# Создаем автопилот
|
||||
autopilot = AutoPilot(initial_x=0.0, initial_y=0.0, initial_altitude=10.0)
|
||||
|
||||
# Создаем базовое изображение
|
||||
base_image = create_test_image(pattern_type="circles")
|
||||
|
||||
# Траектория движения (dx, dy, rotation)
|
||||
trajectory = [
|
||||
(10, 0, 0), # Движение вперед
|
||||
(10, 5, 0.1), # Движение вперед с небольшим поворотом
|
||||
(0, 10, 0), # Движение вправо
|
||||
(-5, 5, -0.1), # Движение назад-вправо с поворотом
|
||||
(5, -5, 0.05), # Движение вперед-влево
|
||||
(0, -10, 0), # Движение влево
|
||||
(-10, 0, 0), # Движение назад
|
||||
]
|
||||
|
||||
# Массивы для хранения истории позиций
|
||||
x_history = []
|
||||
y_history = []
|
||||
angle_history = []
|
||||
|
||||
# Обрабатываем каждое изображение в траектории
|
||||
for i, (dx, dy, rotation) in enumerate(trajectory):
|
||||
print(f"\n--- Кадр {i+1} ---")
|
||||
|
||||
# Создаем смещенное изображение
|
||||
current_image = simulate_drone_movement(base_image, dx, dy, rotation)
|
||||
|
||||
# Обрабатываем изображение
|
||||
autopilot.handle(current_image)
|
||||
|
||||
# Получаем состояние БПЛА
|
||||
state = autopilot.get_drone_state()
|
||||
x_history.append(state['x'])
|
||||
y_history.append(state['y'])
|
||||
angle_history.append(state['angle_degrees'])
|
||||
|
||||
# Обновляем базовое изображение для следующего кадра
|
||||
base_image = current_image
|
||||
|
||||
# Небольшая пауза для визуализации
|
||||
time.sleep(0.5)
|
||||
|
||||
# Закрываем окно OpenCV
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Визуализируем траекторию
|
||||
plot_trajectory(x_history, y_history, angle_history)
|
||||
|
||||
def plot_trajectory(x_history, y_history, angle_history):
|
||||
"""Визуализирует траекторию движения БПЛА"""
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||
|
||||
# График траектории
|
||||
ax1.plot(x_history, y_history, 'b-o', linewidth=2, markersize=8)
|
||||
ax1.plot(x_history[0], y_history[0], 'go', markersize=12, label='Начало')
|
||||
ax1.plot(x_history[-1], y_history[-1], 'ro', markersize=12, label='Конец')
|
||||
ax1.set_xlabel('X координата (м)')
|
||||
ax1.set_ylabel('Y координата (м)')
|
||||
ax1.set_title('Траектория движения БПЛА')
|
||||
ax1.grid(True)
|
||||
ax1.legend()
|
||||
ax1.axis('equal')
|
||||
|
||||
# График угла поворота
|
||||
ax2.plot(range(len(angle_history)), angle_history, 'r-o', linewidth=2, markersize=8)
|
||||
ax2.set_xlabel('Номер кадра')
|
||||
ax2.set_ylabel('Угол поворота (градусы)')
|
||||
ax2.set_title('Изменение угла поворота БПЛА')
|
||||
ax2.grid(True)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def test_single_frame():
|
||||
"""Тестирует обработку одного кадра"""
|
||||
print("Тестирование обработки одного кадра...")
|
||||
|
||||
autopilot = AutoPilot()
|
||||
|
||||
# Создаем два похожих изображения
|
||||
img1 = create_test_image(pattern_type="circles")
|
||||
img2 = simulate_drone_movement(img1, 15, 10, 0.05)
|
||||
|
||||
# Обрабатываем первое изображение
|
||||
autopilot.handle(img1)
|
||||
|
||||
# Обрабатываем второе изображение
|
||||
autopilot.handle(img2)
|
||||
|
||||
# Получаем состояние
|
||||
state = autopilot.get_drone_state()
|
||||
print(f"Позиция БПЛА: ({state['x']:.2f}, {state['y']:.2f})")
|
||||
print(f"Угол БПЛА: {state['angle_degrees']:.1f}°")
|
||||
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
def test_trajectory_tracking():
|
||||
"""Тестирует систему отслеживания траектории"""
|
||||
print("Запуск теста системы отслеживания траектории...")
|
||||
|
||||
# Создаем пилотов
|
||||
operator_pilot = RandomPilot(velocity=1.0)
|
||||
autonome_pilot = AutoPilot(initial_x=0.0, initial_y=0.0)
|
||||
|
||||
# Создаем симулятор
|
||||
simulator = Simulator(operator_pilot, autonome_pilot)
|
||||
|
||||
try:
|
||||
# Запускаем симуляцию
|
||||
simulator.loop()
|
||||
|
||||
print("Симуляция завершена!")
|
||||
print(f"Всего точек траектории: {len(simulator.trajectory_x)}")
|
||||
print(f"Режим оператора: {sum(1 for mode in simulator.trajectory_modes if mode == SimMode.OPERATOR)} точек")
|
||||
print(f"Автономный режим: {sum(1 for mode in simulator.trajectory_modes if mode == SimMode.AUTONOME)} точек")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ошибка во время симуляции: {e}")
|
||||
finally:
|
||||
# Закрываем браузер
|
||||
simulator.driver.quit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Выберите тест:")
|
||||
print("1. Тест одного кадра")
|
||||
print("2. Тест полной траектории")
|
||||
|
||||
choice = input("Введите номер теста (1 или 2): ").strip()
|
||||
|
||||
if choice == "1":
|
||||
test_single_frame()
|
||||
elif choice == "2":
|
||||
test_trajectory_tracking()
|
||||
else:
|
||||
print("Неверный выбор. Запускаю тест одного кадра...")
|
||||
test_single_frame()
|
||||
@@ -1,65 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Тестовый скрипт для проверки единой системы визуализации
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
from autopilot import AutoPilot, RandomPilot
|
||||
from simulator import Simulator
|
||||
from visualization import VisualizationManager, SimMode
|
||||
|
||||
def test_unified_visualization():
|
||||
"""Тестирует единую систему визуализации"""
|
||||
print("Тест единой системы визуализации...")
|
||||
|
||||
# Создаем менеджер визуализации
|
||||
viz_manager = VisualizationManager("Test - Unified Visualization")
|
||||
|
||||
# Создаем автопилот с менеджером визуализации
|
||||
autonome_pilot = AutoPilot(initial_x=0.0, initial_y=0.0, viz_manager=viz_manager)
|
||||
|
||||
# Создаем симулятор
|
||||
simulator = Simulator(RandomPilot(), autonome_pilot)
|
||||
|
||||
# Создаем тестовые изображения
|
||||
width, height = 640, 480
|
||||
test_image1 = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
|
||||
# Добавляем объекты для отслеживания
|
||||
cv2.circle(test_image1, (width//2, height//2), 50, (255, 255, 255), -1)
|
||||
cv2.circle(test_image1, (100, 100), 20, (255, 0, 0), -1)
|
||||
|
||||
cv2.circle(test_image2, (width//2 + 10, height//2 + 5), 50, (255, 255, 255), -1)
|
||||
cv2.circle(test_image2, (110, 105), 20, (255, 0, 0), -1)
|
||||
|
||||
# Конвертируем в PIL Image
|
||||
pil_image1 = Image.fromarray(test_image1)
|
||||
pil_image2 = Image.fromarray(test_image2)
|
||||
|
||||
print("Обрабатываем первое изображение...")
|
||||
autonome_pilot.handle(pil_image1)
|
||||
|
||||
print("Обрабатываем второе изображение...")
|
||||
autonome_pilot.handle(pil_image2)
|
||||
|
||||
# Обновляем визуализацию
|
||||
viz_manager.update_display()
|
||||
|
||||
# Получаем состояние дрона
|
||||
drone_state = autonome_pilot.get_drone_state()
|
||||
print(f"Позиция дрона: ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
|
||||
print(f"Угол дрона: {drone_state['angle_degrees']:.1f}°")
|
||||
|
||||
print("Тест завершен! Окно визуализации должно показывать:")
|
||||
print("- Глобальную карту с траекторией")
|
||||
print("- Детекцию ключевых точек")
|
||||
print("- Сопоставление точек между кадрами")
|
||||
|
||||
# Показываем финальное состояние
|
||||
viz_manager.show_final()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_unified_visualization()
|
||||
@@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Упрощенный тест для проверки системы визуализации без браузера
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import cv2
|
||||
from autopilot import AutoPilot
|
||||
from visualization import VisualizationManager, SimMode
|
||||
|
||||
def test_visualization_only():
|
||||
"""Тестирует только систему визуализации без симулятора"""
|
||||
print("Тест системы визуализации...")
|
||||
|
||||
# Создаем менеджер визуализации
|
||||
viz_manager = VisualizationManager("Test - Visualization Only")
|
||||
|
||||
# Создаем автопилот с менеджером визуализации
|
||||
autonome_pilot = AutoPilot(initial_x=0.0, initial_y=0.0, viz_manager=viz_manager)
|
||||
|
||||
# Создаем тестовые изображения
|
||||
width, height = 640, 480
|
||||
test_image1 = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
test_image2 = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
|
||||
# Добавляем объекты для отслеживания
|
||||
cv2.circle(test_image1, (width//2, height//2), 50, (255, 255, 255), -1)
|
||||
cv2.circle(test_image1, (100, 100), 20, (255, 0, 0), -1)
|
||||
cv2.circle(test_image1, (width-100, height-100), 20, (0, 255, 0), -1)
|
||||
|
||||
cv2.circle(test_image2, (width//2 + 10, height//2 + 5), 50, (255, 255, 255), -1)
|
||||
cv2.circle(test_image2, (110, 105), 20, (255, 0, 0), -1)
|
||||
cv2.circle(test_image2, (width-90, height-95), 20, (0, 255, 0), -1)
|
||||
|
||||
# Конвертируем в PIL Image
|
||||
pil_image1 = Image.fromarray(test_image1)
|
||||
pil_image2 = Image.fromarray(test_image2)
|
||||
|
||||
print("Обрабатываем первое изображение...")
|
||||
autonome_pilot.handle(pil_image1)
|
||||
|
||||
print("Обрабатываем второе изображение...")
|
||||
autonome_pilot.handle(pil_image2)
|
||||
|
||||
# Симулируем движение дрона
|
||||
print("Симулируем движение дрона...")
|
||||
for i in range(10):
|
||||
# Обновляем глобальную карту
|
||||
viz_manager.update_global_map(i * 10, i * 5, SimMode.OPERATOR if i < 5 else SimMode.AUTONOME)
|
||||
viz_manager.update_display()
|
||||
|
||||
# Получаем состояние дрона
|
||||
drone_state = autonome_pilot.get_drone_state()
|
||||
print(f"Позиция дрона: ({drone_state['x']:.2f}, {drone_state['y']:.2f})")
|
||||
print(f"Угол дрона: {drone_state['angle_degrees']:.1f}°")
|
||||
|
||||
print("Тест завершен! Окно визуализации должно показывать:")
|
||||
print("- Глобальную карту с траекторией (синий - оператор, красный - автономный)")
|
||||
print("- Детекцию ключевых точек на текущем кадре")
|
||||
print("- Сопоставление точек между кадрами")
|
||||
|
||||
# Показываем финальное состояние
|
||||
viz_manager.show_final()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_visualization_only()
|
||||
@@ -1,277 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Модуль для управления общим окном визуализации
|
||||
"""
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
import numpy as np
|
||||
from enum import Enum
|
||||
import cv2
|
||||
from PIL import Image
|
||||
import matplotlib
|
||||
|
||||
# Настройки matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
plt.rcParams['figure.raise_window'] = False
|
||||
|
||||
class SimMode(Enum):
|
||||
OPERATOR = 1
|
||||
AUTONOME = 2
|
||||
|
||||
class VisualizationManager:
|
||||
"""
|
||||
Менеджер для управления общим окном визуализации
|
||||
"""
|
||||
|
||||
def __init__(self, window_title="Drone Autopilot Visualization"):
|
||||
self.window_title = window_title
|
||||
self.fig = None
|
||||
self.ax_error_plot = None # График погрешности позиции
|
||||
self.ax_global_map = None
|
||||
self.ax_detection = None
|
||||
self.ax_matches = None
|
||||
|
||||
# Данные для глобальной карты
|
||||
self.trajectory_x = []
|
||||
self.trajectory_y = []
|
||||
self.trajectory_modes = []
|
||||
self.current_x = 0.0
|
||||
self.current_y = 0.0
|
||||
|
||||
# Данные для траектории БПЛА (его собственное видение)
|
||||
self.drone_trajectory_x = []
|
||||
self.drone_trajectory_y = []
|
||||
|
||||
# Данные для графика погрешности
|
||||
self.error_times = []
|
||||
self.position_errors = []
|
||||
|
||||
# Данные для детекции
|
||||
self.current_frame = None
|
||||
self.keypoints = []
|
||||
self.matches = []
|
||||
|
||||
self._setup_window()
|
||||
|
||||
def _setup_window(self):
|
||||
"""Настраивает общее окно с несколькими областями"""
|
||||
plt.ion()
|
||||
self.fig = plt.figure(figsize=(16, 10))
|
||||
self.fig.canvas.manager.window.title(self.window_title)
|
||||
|
||||
# Открываем окно на полный экран
|
||||
self.fig.canvas.manager.window.state('zoomed')
|
||||
|
||||
# Создаем сетку 2x2 с разными размерами колонок
|
||||
gs = self.fig.add_gridspec(2, 2, hspace=0.3, wspace=0.3, width_ratios=[1, 1])
|
||||
|
||||
# График погрешности позиции (левый верхний угол)
|
||||
self.ax_error_plot = self.fig.add_subplot(gs[0, 0])
|
||||
self.ax_error_plot.set_title('Погрешность позиции от времени')
|
||||
self.ax_error_plot.set_xlabel('Время (кадры)')
|
||||
self.ax_error_plot.set_ylabel('Погрешность (метры)')
|
||||
self.ax_error_plot.grid(True, alpha=0.3)
|
||||
|
||||
# Глобальная карта (левый нижний угол)
|
||||
self.ax_global_map = self.fig.add_subplot(gs[1, 0])
|
||||
self.ax_global_map.set_title('Global Map - Траектория полета беспилотника')
|
||||
self.ax_global_map.set_xlabel('X координата')
|
||||
self.ax_global_map.set_ylabel('Y координата')
|
||||
self.ax_global_map.grid(True, alpha=0.3)
|
||||
self.ax_global_map.axhline(y=0, color='k', linestyle='-', alpha=0.3)
|
||||
self.ax_global_map.axvline(x=0, color='k', linestyle='-', alpha=0.3)
|
||||
|
||||
# Детекция ключевых точек (правый верхний угол)
|
||||
self.ax_detection = self.fig.add_subplot(gs[0, 1])
|
||||
self.ax_detection.set_title('Keypoint Detection')
|
||||
self.ax_detection.axis('off')
|
||||
|
||||
# Сопоставление точек (правый нижний угол)
|
||||
self.ax_matches = self.fig.add_subplot(gs[1, 1])
|
||||
self.ax_matches.set_title('Feature Matching')
|
||||
self.ax_matches.axis('off')
|
||||
|
||||
# Настройки окна
|
||||
self.fig.canvas.manager.window.attributes('-topmost', False)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
def update_global_map(self, x: float, y: float, mode: SimMode):
|
||||
"""Обновляет глобальную карту"""
|
||||
self.current_x = x
|
||||
self.current_y = y
|
||||
self.trajectory_x.append(x)
|
||||
self.trajectory_y.append(y)
|
||||
self.trajectory_modes.append(mode)
|
||||
|
||||
self.ax_global_map.clear()
|
||||
self.ax_global_map.set_title('Global Map - Траектория полета беспилотника')
|
||||
self.ax_global_map.set_xlabel('X координата')
|
||||
self.ax_global_map.set_ylabel('Y координата')
|
||||
self.ax_global_map.grid(True, alpha=0.3)
|
||||
self.ax_global_map.axhline(y=0, color='k', linestyle='-', alpha=0.3)
|
||||
self.ax_global_map.axvline(x=0, color='k', linestyle='-', alpha=0.3)
|
||||
|
||||
if len(self.trajectory_x) > 1:
|
||||
# Разделяем траекторию по режимам
|
||||
operator_indices = [i for i, m in enumerate(self.trajectory_modes) if m == SimMode.OPERATOR]
|
||||
autonome_indices = operator_indices[-1:] + [i for i, m in enumerate(self.trajectory_modes) if m == SimMode.AUTONOME]
|
||||
|
||||
# Рисуем траекторию оператора (синий цвет)
|
||||
if len(operator_indices) > 1:
|
||||
operator_x = [self.trajectory_x[i] for i in operator_indices]
|
||||
operator_y = [self.trajectory_y[i] for i in operator_indices]
|
||||
self.ax_global_map.plot(operator_x, operator_y, 'b-', linewidth=2, label='Режим оператора')
|
||||
|
||||
# Рисуем траекторию автономного режима (красный цвет)
|
||||
if len(autonome_indices) > 1:
|
||||
autonome_x = [self.trajectory_x[i] for i in autonome_indices]
|
||||
autonome_y = [self.trajectory_y[i] for i in autonome_indices]
|
||||
self.ax_global_map.plot(autonome_x, autonome_y, 'r-', linewidth=2, label='Автономный режим')
|
||||
|
||||
# Рисуем траекторию БПЛА (пунктирная линия, тонкая)
|
||||
if len(self.drone_trajectory_x) > 1:
|
||||
self.ax_global_map.plot(self.drone_trajectory_x, self.drone_trajectory_y,
|
||||
'g--', linewidth=1, alpha=0.7, label='Данные по одометрии')
|
||||
|
||||
# Рисуем начальную точку (зеленая)
|
||||
self.ax_global_map.plot(self.trajectory_x[0], self.trajectory_y[0], 'go', markersize=8, label='Начальная точка')
|
||||
|
||||
# Рисуем текущую позицию (черная)
|
||||
self.ax_global_map.plot(self.current_x, self.current_y, 'ko', markersize=6, label='Текущая позиция')
|
||||
|
||||
# Рисуем целевую точку (0, 0) - желтая
|
||||
self.ax_global_map.plot(0, 0, 'yo', markersize=8, label='Цель (0, 0)')
|
||||
|
||||
self.ax_global_map.legend()
|
||||
|
||||
# Автоматически масштабируем оси
|
||||
if len(self.trajectory_x) > 0:
|
||||
margin = 50
|
||||
x_min, x_max = min(self.trajectory_x), max(self.trajectory_x)
|
||||
y_min, y_max = min(self.trajectory_y), max(self.trajectory_y)
|
||||
|
||||
# Учитываем также траекторию БПЛА при масштабировании
|
||||
if len(self.drone_trajectory_x) > 0:
|
||||
x_min = min(x_min, min(self.drone_trajectory_x))
|
||||
x_max = max(x_max, max(self.drone_trajectory_x))
|
||||
y_min = min(y_min, min(self.drone_trajectory_y))
|
||||
y_max = max(y_max, max(self.drone_trajectory_y))
|
||||
|
||||
x_min = min(x_min, 0)
|
||||
x_max = max(x_max, 0)
|
||||
y_min = min(y_min, 0)
|
||||
y_max = max(y_max, 0)
|
||||
|
||||
self.ax_global_map.set_xlim(x_min - margin, x_max + margin)
|
||||
self.ax_global_map.set_ylim(y_min - margin, y_max + margin)
|
||||
|
||||
def update_drone_trajectory(self, drone_x: float, drone_y: float):
|
||||
"""Обновляет траекторию БПЛА (его собственное видение позиции)"""
|
||||
self.drone_trajectory_x.append(drone_x)
|
||||
self.drone_trajectory_y.append(drone_y)
|
||||
|
||||
def update_error_plot(self, frame_count: int, drone_x: float, drone_y: float, true_x: float, true_y: float):
|
||||
"""Обновляет график погрешности позиции"""
|
||||
# Вычисляем погрешность как расстояние между реальной и предполагаемой позицией
|
||||
error = np.sqrt((drone_x - true_x)**2 + (drone_y - true_y)**2)
|
||||
|
||||
self.error_times.append(frame_count)
|
||||
self.position_errors.append(error)
|
||||
|
||||
self.ax_error_plot.clear()
|
||||
self.ax_error_plot.set_title('Погрешность позиции от времени')
|
||||
self.ax_error_plot.set_xlabel('Время (кадры)')
|
||||
self.ax_error_plot.set_ylabel('Погрешность (метры)')
|
||||
self.ax_error_plot.grid(True, alpha=0.3)
|
||||
|
||||
if len(self.error_times) > 1:
|
||||
self.ax_error_plot.plot(self.error_times, self.position_errors, 'b-', linewidth=2)
|
||||
|
||||
# Автоматически масштабируем оси
|
||||
if len(self.position_errors) > 0:
|
||||
margin = 0.1
|
||||
error_min, error_max = min(self.position_errors), max(self.position_errors)
|
||||
if error_max > error_min:
|
||||
self.ax_error_plot.set_ylim(0, error_max + margin)
|
||||
else:
|
||||
self.ax_error_plot.set_ylim(0, 1)
|
||||
|
||||
def update_detection(self, image: np.ndarray, keypoints):
|
||||
"""Обновляет визуализацию детекции ключевых точек"""
|
||||
self.current_frame = image.copy()
|
||||
self.keypoints = keypoints
|
||||
|
||||
self.ax_detection.clear()
|
||||
self.ax_detection.set_title('Keypoint Detection')
|
||||
|
||||
if image is not None:
|
||||
# Конвертируем BGR в RGB для matplotlib
|
||||
if len(image.shape) == 3 and image.shape[2] == 3:
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
else:
|
||||
image_rgb = image
|
||||
|
||||
self.ax_detection.imshow(image_rgb)
|
||||
|
||||
# Рисуем ключевые точки
|
||||
if keypoints:
|
||||
kp_coords = np.array([kp.pt for kp in keypoints])
|
||||
self.ax_detection.scatter(kp_coords[:, 0], kp_coords[:, 1],
|
||||
c='red', s=20, alpha=0.7, marker='o')
|
||||
|
||||
self.ax_detection.axis('off')
|
||||
|
||||
def update_matches(self, img1: np.ndarray, img2: np.ndarray,
|
||||
kp1, kp2, matches, transformation_info=None):
|
||||
"""Обновляет визуализацию сопоставления точек"""
|
||||
self.ax_matches.clear()
|
||||
self.ax_matches.set_title('Feature Matching')
|
||||
|
||||
if img1 is not None and img2 is not None and matches:
|
||||
# Рисуем сопоставления
|
||||
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches, None,
|
||||
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
|
||||
|
||||
# Конвертируем BGR в RGB
|
||||
if len(img_matches.shape) == 3 and img_matches.shape[2] == 3:
|
||||
img_matches_rgb = cv2.cvtColor(img_matches, cv2.COLOR_BGR2RGB)
|
||||
else:
|
||||
img_matches_rgb = img_matches
|
||||
|
||||
self.ax_matches.imshow(img_matches_rgb)
|
||||
|
||||
# Добавляем информацию о трансформации
|
||||
if transformation_info:
|
||||
tx, ty = transformation_info['translation']
|
||||
angle = transformation_info['rotation']
|
||||
|
||||
info_text = f"Translation: ({tx:.2f}, {ty:.2f})"
|
||||
info_text2 = f"Rotation: {angle:.2f} rad ({np.degrees(angle):.1f}°)"
|
||||
|
||||
self.ax_matches.text(10, 30, info_text, fontsize=8, color='green',
|
||||
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
|
||||
self.ax_matches.text(10, 90, info_text2, fontsize=8, color='green',
|
||||
bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
|
||||
|
||||
self.ax_matches.axis('off')
|
||||
|
||||
def update_display(self):
|
||||
"""Обновляет отображение всех областей"""
|
||||
self.fig.canvas.draw()
|
||||
self.fig.canvas.flush_events()
|
||||
plt.pause(0.01)
|
||||
|
||||
def close(self):
|
||||
"""Закрывает окно"""
|
||||
plt.close(self.fig)
|
||||
|
||||
def show_final(self):
|
||||
"""Показывает финальное состояние окна"""
|
||||
plt.ioff()
|
||||
print("Симуляция завершена. Окно визуализации остается открытым для анализа.")
|
||||
plt.pause(100000)
|
||||
|
||||
def pause(self, duration: float):
|
||||
plt.pause(duration)
|
||||
164
position.py
Normal file
164
position.py
Normal file
@@ -0,0 +1,164 @@
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
from numpy.linalg import inv
|
||||
from typing import Optional
|
||||
import constants
|
||||
from scipy.spatial.transform import Rotation
|
||||
import utility
|
||||
|
||||
class Position:
|
||||
"""Класс позиции с полной ориентацией БПЛА в 3D пространстве"""
|
||||
|
||||
x: float # Координата X
|
||||
y: float # Координата Y
|
||||
z: float # Масштаб
|
||||
yaw: float # Рыскание (rotation around Z-axis)
|
||||
pitch: float # Тангаж (rotation around Y-axis)
|
||||
roll: float # Крен (rotation around X-axis)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
x: float = 0,
|
||||
y: float = 0,
|
||||
z: float = 1,
|
||||
yaw: float = 0,
|
||||
pitch: float = 0,
|
||||
roll: float = 0
|
||||
):
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.z = z
|
||||
self.yaw = yaw
|
||||
self.pitch = pitch
|
||||
self.roll = roll
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
f"Position(x={self.x:.2f}, y={self.y:.2f}, z={self.z:.2f}, "
|
||||
f"yaw={math.degrees(self.yaw):.1f}°, "
|
||||
f"pitch={math.degrees(self.pitch):.1f}°, "
|
||||
f"roll={math.degrees(self.roll):.1f}°)"
|
||||
)
|
||||
|
||||
def __imul__(self, scalar: float):
|
||||
self.x *= scalar
|
||||
self.y *= scalar
|
||||
self.z *= scalar
|
||||
return self
|
||||
|
||||
def __mul__(self, scalar: float) -> 'Position':
|
||||
pos = self.copy()
|
||||
pos *= scalar
|
||||
return pos
|
||||
|
||||
def __itruediv__(self, scalar: float):
|
||||
self.x /= scalar
|
||||
self.y /= scalar
|
||||
self.z /= scalar
|
||||
return self
|
||||
|
||||
def __truediv__(self, scalar: float) -> 'Position':
|
||||
pos = self.copy()
|
||||
pos /= scalar
|
||||
return pos
|
||||
|
||||
def get_homography_matrix(self, K_in: np.ndarray = constants.K, K_out: np.ndarray | None = None, sliding: bool = True) -> np.ndarray:
|
||||
""" Возвращает матрицу гомографии """
|
||||
R = self.get_rotation_matrix()
|
||||
T = self.get_translation_matrix(K_in)
|
||||
if not sliding:
|
||||
T[0, 2] = T[1, 2] = 0
|
||||
if K_out is None: K_out = K_in
|
||||
return K_out @ R @ T @ np.linalg.inv(K_in)
|
||||
|
||||
def copy(self) -> 'Position':
|
||||
"""Создает полную копию объекта"""
|
||||
return Position(self.x, self.y, self.z, self.yaw, self.pitch, self.roll)
|
||||
|
||||
def get_translation_matrix(self, K: np.ndarray = constants.K) -> np.ndarray:
|
||||
return np.array([
|
||||
[1, 0, self.x / K[0][0]],
|
||||
[0, 1, self.y / K[0][0]],
|
||||
[0, 0, self.z]
|
||||
])
|
||||
|
||||
def get_rotation_matrix(self) -> np.ndarray:
|
||||
"""
|
||||
Матрица вращения с порядком применения: yaw → pitch → roll
|
||||
Умножение: R = Rx(roll) * Ry(pitch) * Rz(yaw)
|
||||
"""
|
||||
cy, sy = math.cos(self.yaw), math.sin(self.yaw)
|
||||
cp, sp = math.cos(self.pitch), math.sin(self.pitch)
|
||||
cr, sr = math.cos(self.roll), math.sin(self.roll)
|
||||
|
||||
Rz = np.array([
|
||||
[cy, -sy, 0],
|
||||
[sy, cy, 0],
|
||||
[0, 0, 1],
|
||||
])
|
||||
|
||||
Ry = np.array([
|
||||
[cp, 0, sp],
|
||||
[0, 1, 0],
|
||||
[-sp, 0, cp],
|
||||
])
|
||||
|
||||
Rx = np.array([
|
||||
[1, 0, 0],
|
||||
[0, cr, -sr],
|
||||
[0, sr, cr],
|
||||
])
|
||||
|
||||
return Rx @ Ry @ Rz
|
||||
|
||||
def iapply(self, homography_matrix: np.ndarray, K = constants.K) -> 'Position':
|
||||
"""Применяет матрицу трансформации для вычисления новой позиции и ориентации."""
|
||||
|
||||
np.set_printoptions(suppress=True)
|
||||
H = homography_matrix @ self.get_homography_matrix(sliding=False)
|
||||
|
||||
# Decompose homography
|
||||
_, R, t, _ = cv2.decomposeHomographyMat(H, K)
|
||||
R = np.array(R)
|
||||
t = np.array(t)
|
||||
|
||||
# print(cv2.decomposeHomographyMat(inv(H), K))
|
||||
# _, _, z, _ = cv2.decomposeHomographyMat(inv(H), K)
|
||||
# print(z)
|
||||
# z = z.copy()
|
||||
# z *= constants._K_FOCUS_DISTANCE
|
||||
# print(z)
|
||||
|
||||
T = inv(R) @ inv(K) @ H @ K
|
||||
ind = np.array([A[2][0] ** 2 + A[2][1] ** 2 for A in T])
|
||||
top_k = max(1, len(T) // 2)
|
||||
if (len(T) == 3): raise "len(T) == 3"
|
||||
ind = np.argpartition(ind, top_k - 1)[:top_k]
|
||||
T = T[ind[0]]
|
||||
T = T @ np.array([0, 0, 1]) / np.mean((T[0][0], T[1][1]))
|
||||
T[2] -= 1
|
||||
R = R[ind]
|
||||
t = t[ind]
|
||||
|
||||
best_id = ((t - T) ** 2).sum((1, 2)).argmin()
|
||||
|
||||
R = R[best_id]
|
||||
rot = Rotation.from_matrix(R).as_euler('XYZ').flatten()
|
||||
self.roll = min(np.radians(5), max(np.radians(-5), rot[0]))
|
||||
self.pitch = min(np.radians(5), max(np.radians(-5), rot[1]))
|
||||
self.yaw = rot[2]
|
||||
|
||||
t = t[best_id].flatten()
|
||||
self.x -= T[0] * constants._K_FOCUS_DISTANCE
|
||||
self.y += T[1] * constants._K_FOCUS_DISTANCE
|
||||
self.z = max(0.7, min(1.3, 1 + T[2]))
|
||||
T[0] *= constants._K_FOCUS_DISTANCE
|
||||
T[1] *= constants._K_FOCUS_DISTANCE
|
||||
|
||||
def apply(self, homography_matrix: np.ndarray, K = constants.K) -> 'Position':
|
||||
"""Применяет матрицу трансформации для вычисления новой позиции и ориентации."""
|
||||
pos = self.copy()
|
||||
pos.iapply(homography_matrix, K)
|
||||
return pos
|
||||
166
simulator.py
166
simulator.py
@@ -1,113 +1,99 @@
|
||||
import math
|
||||
import os
|
||||
from io import BytesIO
|
||||
from time import sleep
|
||||
|
||||
from PIL import Image
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
|
||||
from autopilot import Pilot
|
||||
from geolocation import Geolocation
|
||||
import cv2
|
||||
import numpy as np
|
||||
from position import Position
|
||||
from vision_chunk import VisionChunk
|
||||
from visualization import VisualizationManager, SimMode
|
||||
from yandex_map import YandexMap
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import os
|
||||
from google_map import GoogleMap
|
||||
import constants
|
||||
import utility
|
||||
|
||||
class Simulator:
|
||||
geo: Geolocation
|
||||
yandexMap: YandexMap
|
||||
def __init__(self, online_map: YandexMap | GoogleMap = None):
|
||||
self.online_map = online_map
|
||||
# Используем новый конструктор с yaw, pitch, roll
|
||||
self.pos = Position(x=0, y=0, z=1, yaw=0, pitch=0, roll=0)
|
||||
|
||||
# Менеджер визуализации
|
||||
viz_manager: VisualizationManager
|
||||
|
||||
def __init__(self, yandexMap: YandexMap = None):
|
||||
self.yandexMap = yandexMap
|
||||
|
||||
# Инициализация переменных для отслеживания траектории
|
||||
self.geo = Geolocation(0, 0, 1, 0)
|
||||
|
||||
# Создаем папку для изображений, если её нет
|
||||
os.makedirs('./images', exist_ok=True)
|
||||
|
||||
def rotate_image_like_drone(self, image: Image.Image, angle: float) -> Image.Image:
|
||||
"""
|
||||
Поворачивает картинку как будто съемка ведется с летящего дрона.
|
||||
Выделяет концентрический квадрат, поворачивает его и извлекает результат.
|
||||
"""
|
||||
# Получаем размеры изображения
|
||||
width, height = image.size
|
||||
square_size = min(width, height)
|
||||
off_x = (width - square_size) / 2
|
||||
off_y = (height - square_size) / 2
|
||||
def set_pitch(self, pitch_deg: float):
|
||||
"""Установить тангаж камеры (градусы, -10 до 10)"""
|
||||
clamped_pitch = max(-10, min(10, pitch_deg))
|
||||
self.pos.pitch = math.radians(clamped_pitch)
|
||||
|
||||
cropped_image = image.crop((off_x, off_y, off_x + square_size, off_y + square_size))
|
||||
cropped_image = cropped_image.rotate(angle / math.pi * 180, expand=True)
|
||||
|
||||
# Определяем размер концентрического квадрата (80% от минимальной стороны)
|
||||
local_square_size = int(square_size / 2 ** 0.5)
|
||||
|
||||
# Вычисляем координаты для центрирования квадрата
|
||||
left = (cropped_image.width - local_square_size) // 2
|
||||
top = (cropped_image.height - local_square_size) // 2
|
||||
right = left + local_square_size
|
||||
bottom = top + local_square_size
|
||||
|
||||
# Вырезаем концентрический квадрат
|
||||
final_image = cropped_image.crop((left, top, right, bottom))
|
||||
def set_roll(self, roll_deg: float):
|
||||
"""Установить крен камеры (градусы, -10 до 10)"""
|
||||
clamped_roll = max(-10, min(10, roll_deg))
|
||||
self.pos.roll = math.radians(clamped_roll)
|
||||
|
||||
return final_image
|
||||
def _apply_perspective_transform(self, image: Image.Image) -> Image.Image:
|
||||
"""
|
||||
Применяет перспективную трансформацию к изображению.
|
||||
Возвращает квадратное изображение 700x700.
|
||||
"""
|
||||
img_array = np.array(image)
|
||||
h, w, _ = img_array.shape
|
||||
|
||||
# Применяем трансформацию
|
||||
pos = self.pos.copy()
|
||||
pos.x = 0
|
||||
pos.y = 0
|
||||
|
||||
K_in = utility.calc_camera_matrix(w, h)
|
||||
K_out = constants.K
|
||||
H = pos.get_homography_matrix(K_in, K_out)
|
||||
|
||||
shape = (constants.CHUNK_WIDTH, constants.CHUNK_WIDTH)
|
||||
transformed = cv2.warpPerspective(img_array, H, shape)
|
||||
|
||||
return Image.fromarray(transformed)
|
||||
|
||||
def update_trajectory(self, dx: float, dy: float):
|
||||
"""
|
||||
Обновляет траекторию полета беспилотника
|
||||
"""
|
||||
# Обновляем текущие координаты
|
||||
self.geo.x += dx * self.geo.z
|
||||
self.geo.y += dy * self.geo.z
|
||||
|
||||
def update_map(self):
|
||||
"""
|
||||
Обновляет карту траектории полета
|
||||
"""
|
||||
self.viz_manager.update_global_map(self.geo.x, self.geo.y, self.mode)
|
||||
"""Обновляет координаты дрона"""
|
||||
self.pos.x += dx
|
||||
self.pos.y += dy
|
||||
|
||||
def handle(self, dangle: float, velocity: float = 50) -> None:
|
||||
""" Сдвиг камеры """
|
||||
html = self.yandexMap.driver.find_element(By.TAG_NAME, 'html')
|
||||
"""
|
||||
Управление движением дрона.
|
||||
dangle - изменение угла курса (радианы)
|
||||
velocity - скорость движения
|
||||
"""
|
||||
|
||||
action = ActionChains(self.yandexMap.driver)
|
||||
action.move_to_element_with_offset(html, 200, 200)
|
||||
action.click_and_hold()
|
||||
|
||||
self.geo.angle += dangle
|
||||
# print(f" [Simulator] angle: {self.angle / math.pi * 180:.1f}°")
|
||||
# Обновляем yaw в объекте Position
|
||||
self.pos.yaw += dangle
|
||||
velocity = max(velocity, 10)
|
||||
dx = math.cos(math.pi / 2 + self.geo.angle) * velocity / self.geo.z
|
||||
dy = math.sin(math.pi / 2 + self.geo.angle) * velocity / self.geo.z
|
||||
# print(" [Simulator] dx, dy:", [dx, dy])
|
||||
|
||||
# Вычисляем смещение на основе текущего yaw
|
||||
dx = int(math.cos(math.pi / 2 + self.pos.yaw) * velocity)
|
||||
dy = int(math.sin(math.pi / 2 + self.pos.yaw) * velocity)
|
||||
|
||||
self.update_trajectory(dx, dy)
|
||||
action.move_by_offset(-dx, dy)
|
||||
action.release()
|
||||
action.perform()
|
||||
# print(f" [Simulator] Position: {self.current_x}, {self.current_y}")
|
||||
|
||||
def change_zoom(self, direction: str = 'down'):
|
||||
""" Изменение масштаба """
|
||||
self.yandexMap.scroll(0.5, 0.5, 2, direction == 'down')
|
||||
sleep(0.4)
|
||||
if direction == 'down':
|
||||
self.geo.z /= 2
|
||||
else:
|
||||
self.geo.z *= 2
|
||||
self.online_map.move(dx, dy)
|
||||
|
||||
def set_zoom(self, zoom_level: float):
|
||||
"""Программное изменение масштаба"""
|
||||
self.pos.z = zoom_level
|
||||
|
||||
def get_chunk(self) -> VisionChunk:
|
||||
png = self.yandexMap.driver.get_screenshot_as_png()
|
||||
im = Image.open(BytesIO(png))
|
||||
"""Получить текущий снимок с камеры дрона"""
|
||||
im = self.online_map.make_screenshot()
|
||||
|
||||
# Применяем поворот как будто съемка с дрона
|
||||
rotated_im = self.rotate_image_like_drone(im, -self.geo.angle)
|
||||
return VisionChunk(rotated_im)
|
||||
# Применяем перспективную трансформацию
|
||||
transformed_im = self._apply_perspective_transform(im)
|
||||
|
||||
return VisionChunk(transformed_im)
|
||||
|
||||
def get_orientation_info(self) -> dict:
|
||||
"""Получить полную информацию об ориентации дрона"""
|
||||
return {
|
||||
'position': (self.pos.x, self.pos.y, self.pos.z),
|
||||
'yaw_deg': math.degrees(self.pos.yaw),
|
||||
'pitch_deg': math.degrees(self.pos.pitch),
|
||||
'roll_deg': math.degrees(self.pos.roll),
|
||||
'rotation_matrix': self.pos.get_rotation_matrix()
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
189
test_chunks.ipynb
Normal file
189
test_chunks.ipynb
Normal file
File diff suppressed because one or more lines are too long
1004
test_projection.ipynb
Normal file
1004
test_projection.ipynb
Normal file
File diff suppressed because one or more lines are too long
6
timer.py
6
timer.py
@@ -29,3 +29,9 @@ class Timer:
|
||||
self.elapsed = 0.
|
||||
self.enabled = False
|
||||
self.last_enabled = 0.
|
||||
|
||||
def loop(self) -> float:
|
||||
v = self.get_diff()
|
||||
self.stop()
|
||||
self.start()
|
||||
return v
|
||||
24
todo.md
24
todo.md
@@ -2,16 +2,16 @@
|
||||
[!] Проверка корректности выявления ориентира на кадре
|
||||
[!] Исправление коррекции координат на основе сопоставления с ориентиром
|
||||
|
||||
[-] FPS счетчик
|
||||
| [-] Оптимизация детекции точек
|
||||
[-] Оформление статистики при тестовых запусках
|
||||
[-] Проведение тестовых запусков
|
||||
[-] Оформление отчета
|
||||
[-] Эксперименты с разными детекторами (SIFT, KAZE)
|
||||
[+] FPS счетчик
|
||||
| [+] Оптимизация детекции точек
|
||||
[+] Оформление статистики при тестовых запусках
|
||||
[+] Проведение тестовых запусков
|
||||
[+] Оформление отчета
|
||||
[+] Эксперименты с разными детекторами (SIFT, KAZE)
|
||||
|
||||
[?] Изменение масштаба во время полёта, обработка этой трансформации
|
||||
[?] Поворот ориентиров
|
||||
[?] Ограничение выбора точек при построении маршрута, чтобы ориентиры полностью попадали в кадр
|
||||
[+] Изменение масштаба во время полёта, обработка этой трансформации
|
||||
[+] Поворот ориентиров
|
||||
[+] Ограничение выбора точек при построении маршрута, чтобы ориентиры полностью попадали в кадр
|
||||
|
||||
[+] График межкадрового смещения
|
||||
| [+] График межкадровых смещениях по версии матрицы гомографии
|
||||
@@ -19,6 +19,6 @@
|
||||
| [+] Исследовать причину погрешности при развороте
|
||||
[+] Устранение большой погрешности при повороте
|
||||
|
||||
[ ] Переделать ключевые точки -> Optical Flow
|
||||
[ ] Добавить перспективу
|
||||
[ ] Эталоны на Google Maps, полёт тот же
|
||||
[+] Переделать ключевые точки -> Optical Flow
|
||||
[+] Добавить перспективу
|
||||
[+] Эталоны на Google Maps, полёт тот же
|
||||
|
||||
156
utility.py
156
utility.py
@@ -1,6 +1,11 @@
|
||||
from PIL import Image
|
||||
from datetime import datetime
|
||||
from urllib.parse import parse_qs, urlparse, unquote
|
||||
|
||||
import constants
|
||||
import cv2
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
def cv2_to_pil(cv_image: np.ndarray) -> Image.Image:
|
||||
"""
|
||||
@@ -12,4 +17,153 @@ def cv2_to_pil(cv_image: np.ndarray) -> Image.Image:
|
||||
|
||||
def image_to_numpy(self, image: Image.Image) -> np.ndarray:
|
||||
"""Конвертирует PIL Image в numpy array для OpenCV"""
|
||||
return np.array(image)
|
||||
return np.array(image)
|
||||
|
||||
def get_normals(H: np.ndarray, R: np.ndarray, T: np.ndarray) -> np.ndarray:
|
||||
n = cv2.decomposeHomographyMat(H, constants.K, R, T)
|
||||
return n
|
||||
|
||||
def estimate_transformation_matrix(src_pts: np.ndarray, dst_pts: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Оценивает матрицу трансформации на основе сопоставленных точек"""
|
||||
# Используем RANSAC для оценки матрицы гомографии
|
||||
return cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0, maxIters=1000)
|
||||
|
||||
def calc_camera_matrix(w: float, h: float):
|
||||
f = constants._K_FOCUS_DISTANCE
|
||||
return np.array([
|
||||
[f, 0, w / 2],
|
||||
[0, f, h / 2],
|
||||
[0, 0, 1]
|
||||
])
|
||||
|
||||
|
||||
def generate_folder_name():
|
||||
"""
|
||||
Генерирует название для папки с текущей датой и временем до секунд.
|
||||
Формат: YYYY-MM-DD_HH-MM-SS
|
||||
"""
|
||||
now = datetime.now()
|
||||
folder_name = now.strftime("%Y-%m-%d_%H-%M-%S")
|
||||
return folder_name
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def parse_yandex_maps_url(url):
|
||||
"""
|
||||
Парсит URL Яндекс.Карт и извлекает lat, lon и zoom
|
||||
Формат: ?ll=lon,lat&z=zoom
|
||||
"""
|
||||
# Декодируем URL (на случай %2C вместо запятых)
|
||||
url = unquote(url)
|
||||
|
||||
# Парсим URL
|
||||
parsed_url = urlparse(url)
|
||||
params = parse_qs(parsed_url.query)
|
||||
|
||||
if 'll' in params and 'z' in params:
|
||||
# ll содержит "lon,lat"
|
||||
ll_value = params['ll'][0]
|
||||
lat, lon = map(float, ll_value.split(','))
|
||||
zoom = int(params['z'][0])
|
||||
|
||||
return {
|
||||
'lat': lat,
|
||||
'lon': lon,
|
||||
'zoom': zoom
|
||||
}
|
||||
return None
|
||||
|
||||
def parse_google_maps_url(url):
|
||||
"""
|
||||
Парсит URL Google Maps и извлекает lat, lon и zoom
|
||||
Формат: /@lat,lon,zoom[m|z]
|
||||
"""
|
||||
pattern = r'/@([-\d.]+),([-\d.]+),(\d+)([mz])'
|
||||
match = re.search(pattern, url)
|
||||
|
||||
if match:
|
||||
lon = float(match.group(1))
|
||||
lat = float(match.group(2))
|
||||
zoom_value = int(match.group(3))
|
||||
zoom_unit = match.group(4)
|
||||
|
||||
return {
|
||||
'lat': lat,
|
||||
'lon': lon,
|
||||
'zoom': zoom_value,
|
||||
'zoom_unit': zoom_unit
|
||||
}
|
||||
return None
|
||||
|
||||
def google_map_js_move_script(dx, dy) -> str:
|
||||
return """
|
||||
async function sleep(ms) {
|
||||
return new Promise((resolve, reject) => {
|
||||
setTimeout(() => resolve(), ms);
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateDrag(element, offsetX, offsetY) {
|
||||
const rect = element.getBoundingClientRect();
|
||||
const startX = rect.left + rect.width / 2;
|
||||
const startY = rect.top + rect.height / 2;
|
||||
const step = 10
|
||||
const endX = startX + offsetX + step;
|
||||
const endY = startY + offsetY + step;
|
||||
|
||||
element.dispatchEvent(new MouseEvent('mousedown', {
|
||||
view: window,
|
||||
bubbles: true,
|
||||
cancelable: true,
|
||||
clientX: startX,
|
||||
clientY: startY,
|
||||
button: 0
|
||||
}));
|
||||
|
||||
let currentX = startX;
|
||||
let currentY = startY;
|
||||
|
||||
function move(stepX, stepY) {
|
||||
currentX += stepX;
|
||||
currentY += stepY;
|
||||
|
||||
document.dispatchEvent(new MouseEvent('mousemove', {
|
||||
view: window,
|
||||
bubbles: true,
|
||||
cancelable: false,
|
||||
clientX: currentX,
|
||||
clientY: currentY
|
||||
}));
|
||||
}
|
||||
|
||||
await sleep(50);
|
||||
move(step, step)
|
||||
|
||||
while (currentX != endX || currentY != endY) {
|
||||
await sleep(50);
|
||||
const stepX = Math.min(step, Math.max(-step, endX - currentX));
|
||||
const stepY = Math.min(step, Math.max(-step, endY - currentY));
|
||||
move(stepX, stepY);
|
||||
}
|
||||
|
||||
await sleep(50)
|
||||
document.dispatchEvent(new MouseEvent('mouseup', {
|
||||
view: window,
|
||||
bubbles: true,
|
||||
cancelable: false,
|
||||
clientX: endX,
|
||||
clientY: endY,
|
||||
button: 0
|
||||
}));
|
||||
}
|
||||
|
||||
{
|
||||
const canvas = document.querySelector('canvas.H1VXrf.JRr1M.DnOnV');
|
||||
""" + f"simulateDrag(canvas, {int(-dx)}, {int(dy)});" + """
|
||||
}
|
||||
"""
|
||||
@@ -1,10 +1,13 @@
|
||||
import constants
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from position import Position
|
||||
from timer import Timer
|
||||
from typing import Literal, Optional, Tuple
|
||||
from PIL import Image
|
||||
|
||||
FeatureMethod = Literal["orb", "sift", "akaze", "brisk"]
|
||||
DEFAULT_METHOD = "orb"
|
||||
@@ -14,6 +17,7 @@ class VisionChunk:
|
||||
image: Image.Image
|
||||
feature_method: FeatureMethod = DEFAULT_METHOD
|
||||
|
||||
pos: Optional[Position] = field(default=None, init=False)
|
||||
keypoints: Optional[list] = field(default=None, init=False)
|
||||
descriptors: Optional[np.ndarray] = field(default=None, init=False)
|
||||
_detector: Optional[cv2.Feature2D] = field(default=None, init=False, repr=False)
|
||||
@@ -27,12 +31,12 @@ class VisionChunk:
|
||||
self._detector = cv2.ORB_create(
|
||||
nfeatures=1000,
|
||||
scaleFactor=1.2,
|
||||
nlevels=32,
|
||||
nlevels=16,
|
||||
edgeThreshold=31,
|
||||
firstLevel=0,
|
||||
WTA_K=2,
|
||||
patchSize=31,
|
||||
fastThreshold=20,
|
||||
fastThreshold=10,
|
||||
)
|
||||
elif self.feature_method == "sift":
|
||||
self._detector = cv2.SIFT_create(
|
||||
@@ -70,30 +74,55 @@ class VisionChunk:
|
||||
return self._matcher
|
||||
|
||||
def _preprocess(self, img_np: np.ndarray) -> np.ndarray:
|
||||
"""CLAHE предобработка для улучшения контраста"""
|
||||
"""Предобработка для улучшения сопоставления между снимками разного времени"""
|
||||
if len(img_np.shape) == 3:
|
||||
gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
gray = img_np
|
||||
|
||||
|
||||
# Гауссовское размытие для подавления шума и мелких различий
|
||||
# blurred = cv2.GaussianBlur(gray, (5, 5), 1.0)
|
||||
|
||||
# CLAHE для выравнивания контраста между снимками
|
||||
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
||||
return clahe.apply(gray)
|
||||
enhanced = clahe.apply(gray)
|
||||
|
||||
# Опционально: нормализация гистограммы для устранения различий в освещении
|
||||
normalized = cv2.normalize(enhanced, None, 0, 255, cv2.NORM_MINMAX)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def compute_keypoints(self, force: bool = False) -> Tuple[list[cv2.KeyPoint], Optional[np.ndarray]]:
|
||||
if self.keypoints is not None and self.descriptors is not None and not force:
|
||||
return self.keypoints, self.descriptors
|
||||
|
||||
timer = Timer()
|
||||
timer.start()
|
||||
detector = self._get_detector()
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-DETECTION]: get_detector: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
# PIL -> OpenCV (RGB->BGR)
|
||||
img_np = np.array(self.image)
|
||||
if img_np.ndim == 3 and img_np.shape[2] == 3:
|
||||
img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-DETECTION]: converting: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
# CLAHE предобработка
|
||||
preprocessed = self._preprocess(img_np)
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-DETECTION]: preprocess: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
keypoints, descriptors = detector.detectAndCompute(preprocessed, None)
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-DETECTION]: detect and compute: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
# Получаем массив response для всех точек
|
||||
responses = np.array([kp.response for kp in keypoints])
|
||||
|
||||
@@ -104,6 +133,9 @@ class VisionChunk:
|
||||
best_keypoints = [keypoints[i] for i in top_indices]
|
||||
best_descriptors = descriptors[top_indices]
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-DETECTION]: filtration: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
self.keypoints = best_keypoints
|
||||
self.descriptors = best_descriptors
|
||||
return self.keypoints, self.descriptors
|
||||
@@ -122,15 +154,29 @@ class VisionChunk:
|
||||
Возвращает: src_pts, dst_pts, good_matches, kp1, kp2 (отцентрированные координаты)
|
||||
"""
|
||||
# Вычисляем keypoints для обоих
|
||||
timer = Timer()
|
||||
timer.start()
|
||||
|
||||
kp1, des1 = self.compute_keypoints()
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-KEYPOINTS]: computing 1: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
kp2, des2 = other.compute_keypoints()
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-KEYPOINTS]: computing 2: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
if des1 is None or des2 is None or len(kp1) < 4 or len(kp2) < 4:
|
||||
return None, None, None, None, None
|
||||
|
||||
# kNN matching + Lowe ratio test
|
||||
matcher = self._get_matcher()
|
||||
matches_knn = matcher.knnMatch(des1, des2, k=2)
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-KEYPOINTS]: matching: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
good_matches: list[cv2.DMatch] = []
|
||||
|
||||
for m_n in matches_knn:
|
||||
@@ -147,28 +193,22 @@ class VisionChunk:
|
||||
if len(good_matches) < 4:
|
||||
return None, None, None, None, None
|
||||
|
||||
# Центр изображений
|
||||
img1_cv = self.to_cv2_gray()
|
||||
img2_cv = other.to_cv2_gray()
|
||||
h1, w1 = img1_cv.shape
|
||||
h2, w2 = img2_cv.shape
|
||||
cx1, cy1 = w1 // 2, h1 // 2
|
||||
cx2, cy2 = w2 // 2, h2 // 2
|
||||
|
||||
# Отцентрированные координаты (x_rel, y_rel)
|
||||
src_pts = []
|
||||
dst_pts = []
|
||||
|
||||
for match in good_matches:
|
||||
pt1 = kp1[match.queryIdx].pt
|
||||
src_pts.append([pt1[0] - cx1, cy1 - pt1[1]]) # Y вверх!
|
||||
src_pts.append([pt1[0], pt1[1]])
|
||||
|
||||
pt2 = kp2[match.trainIdx].pt
|
||||
dst_pts.append([pt2[0] - cx2, cy2 - pt2[1]])
|
||||
dst_pts.append([pt2[0], pt2[1]])
|
||||
|
||||
src_pts = np.float32(src_pts).reshape(-1, 1, 2)
|
||||
dst_pts = np.float32(dst_pts).reshape(-1, 1, 2)
|
||||
|
||||
if constants.DEBUG_FPS:
|
||||
print(f"[VC-KEYPOINTS]: filtration: {timer.loop() * 1000:.2f} ms")
|
||||
|
||||
return src_pts, dst_pts, good_matches, kp1, kp2
|
||||
|
||||
def to_cv2_gray(self) -> np.ndarray:
|
||||
|
||||
@@ -3,14 +3,16 @@
|
||||
Модуль для управления общим окном визуализации
|
||||
"""
|
||||
|
||||
from PIL import Image
|
||||
from enum import Enum
|
||||
from scipy.interpolate import make_interp_spline
|
||||
|
||||
import cv2
|
||||
import matplotlib
|
||||
import matplotlib.axes
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
import numpy as np
|
||||
from enum import Enum
|
||||
import cv2
|
||||
from PIL import Image
|
||||
import matplotlib
|
||||
|
||||
# Настройки matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
@@ -93,14 +95,14 @@ class VisualizationManager:
|
||||
self.ax_matches.axis('off')
|
||||
|
||||
# Сопоставление точек (средний средний угол)
|
||||
self.ax_chunk_matches = self.fig.add_subplot(gs[1, 2])
|
||||
self.ax_chunk_matches = self.fig.add_subplot(gs[1, 1:3])
|
||||
self.ax_chunk_matches.set_title('Chunk Matching')
|
||||
self.ax_chunk_matches.axis('off')
|
||||
|
||||
# Визуализация движения ключевых точек (левый нижний угол)
|
||||
self.ax_motion_vectors = self.fig.add_subplot(gs[1, 1])
|
||||
self.ax_motion_vectors.set_title('Motion Vectors - Движение ключевых точек')
|
||||
self.ax_motion_vectors.axis('off')
|
||||
# self.ax_motion_vectors = self.fig.add_subplot(gs[1, 1])
|
||||
# self.ax_motion_vectors.set_title('Motion Vectors - Движение ключевых точек')
|
||||
# self.ax_motion_vectors.axis('off')
|
||||
|
||||
# Визуализация движения ключевых точек на основе матрицы гомографии
|
||||
self.ax_motion_gomography = self.fig.add_subplot(gs[0, 1])
|
||||
@@ -157,7 +159,7 @@ class VisualizationManager:
|
||||
# Рисуем текущую целевую точку
|
||||
if self.target_idx < len(self.target_pts):
|
||||
pt = self.target_pts[self.target_idx]
|
||||
self.ax_global_map.plot(pt[0], pt[1], 'yo', markersize=8, label='Цель (0, 0)')
|
||||
self.ax_global_map.plot(pt[0], pt[1], 'yo', markersize=8, label='Цель')
|
||||
|
||||
self.ax_global_map.legend()
|
||||
|
||||
@@ -208,7 +210,37 @@ class VisualizationManager:
|
||||
self.ax_error_plot.grid(True, alpha=0.3)
|
||||
|
||||
if len(self.error_times) > 1:
|
||||
self.ax_error_plot.plot(self.error_times, self.position_errors, 'b-', linewidth=2)
|
||||
# Оригинальный график (более прозрачный)
|
||||
self.ax_error_plot.plot(self.error_times, self.position_errors, 'b-',
|
||||
linewidth=1, alpha=0.4, label='Погрешность данных')
|
||||
|
||||
if len(self.error_times) > 5:
|
||||
# Сглаженный график
|
||||
smoothed_times = np.linspace(self.error_times[0], self.error_times[-1], 300)
|
||||
spl = make_interp_spline(self.error_times, self.position_errors, k=3)
|
||||
smoothed_errors = spl(smoothed_times)
|
||||
|
||||
|
||||
self.ax_error_plot.plot(smoothed_times, smoothed_errors, 'orange',
|
||||
linewidth=2, label='Сглаженный тренд')
|
||||
|
||||
# if len(self.position_errors) > 5: # Достаточно данных для сглаживания
|
||||
# window_size = min(11, len(self.position_errors) // 3) # Адаптивный размер окна
|
||||
# if window_size % 2 == 0: # Должен быть нечетным
|
||||
# window_size += 1
|
||||
|
||||
# # Метод скользящего среднего
|
||||
# smoothed_errors = np.convolve(
|
||||
# self.position_errors,
|
||||
# np.ones(window_size) / window_size,
|
||||
# mode='valid'
|
||||
# )
|
||||
|
||||
# # Корректируем временную ось для сглаженных данных
|
||||
# offset = (window_size - 1) // 2
|
||||
# smoothed_times = self.error_times[offset:offset + len(smoothed_errors)]
|
||||
|
||||
self.ax_error_plot.legend(loc='upper right')
|
||||
|
||||
# Автоматически масштабируем оси
|
||||
if len(self.position_errors) > 0:
|
||||
@@ -219,6 +251,7 @@ class VisualizationManager:
|
||||
else:
|
||||
self.ax_error_plot.set_ylim(0, 1)
|
||||
|
||||
|
||||
def update_matches(self, img1: np.ndarray, img2: np.ndarray,
|
||||
kp1, kp2, matches, transformation_info=None):
|
||||
"""Обновляет визуализацию сопоставления точек"""
|
||||
@@ -407,7 +440,6 @@ class VisualizationManager:
|
||||
|
||||
# Получаем размеры изображения и центр
|
||||
height, width = current_frame.shape[:2]
|
||||
center_x, center_y = width // 2, height // 2
|
||||
|
||||
# Создаем сетку точек с заданным шагом
|
||||
grid_points = []
|
||||
@@ -424,8 +456,8 @@ class VisualizationManager:
|
||||
grid_points_centered = []
|
||||
for pt in grid_points:
|
||||
# Отцентрируем координаты точно так же, как в detect_and_match_keypoints
|
||||
centered_x = pt[0] - center_x
|
||||
centered_y = center_y - pt[1] # Инвертируем Y (изображение Y направлен вниз)
|
||||
centered_x = pt[0]
|
||||
centered_y = pt[1]
|
||||
grid_points_centered.append([centered_x, centered_y])
|
||||
|
||||
grid_points_centered = np.array(grid_points_centered, dtype=np.float32)
|
||||
@@ -442,8 +474,8 @@ class VisualizationManager:
|
||||
transformed_points = []
|
||||
for pt in transformed_points_centered:
|
||||
# Обратное преобразование от центрированных координат к координатам изображения
|
||||
img_x = pt[0] + center_x
|
||||
img_y = center_y - pt[1] # Инвертируем Y обратно
|
||||
img_x = pt[0]
|
||||
img_y = pt[1] # Инвертируем Y обратно
|
||||
transformed_points.append([img_x, img_y])
|
||||
|
||||
transformed_points = np.array(transformed_points, dtype=np.float32)
|
||||
|
||||
107
yandex_map.py
107
yandex_map.py
@@ -1,32 +1,41 @@
|
||||
import math
|
||||
from io import BytesIO
|
||||
from time import sleep
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
from selenium.webdriver.common.actions.wheel_input import ScrollOrigin
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from time import sleep
|
||||
|
||||
def generateURL(lat=49.103814, lon=55.794258, zoom=10):
|
||||
import constants
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
import utility
|
||||
|
||||
def generateURL(lat: float, lon: float, zoom: int):
|
||||
return f"https://yandex.ru/maps/43/kazan/?l=sat&ll={lat}%2C{lon}&z={zoom}"
|
||||
|
||||
class YandexMap:
|
||||
def __init__(self):
|
||||
initial_zoom: int
|
||||
initial_lat: float
|
||||
initial_lon: float
|
||||
|
||||
def __init__(self, initial_lat=49.103814, initial_lon=55.794258, initial_zoom=10):
|
||||
self.initial_lat = initial_lat
|
||||
self.initial_lon = initial_lon
|
||||
self.initial_zoom = initial_zoom
|
||||
self.pixel_ratio = constants.YANDEX_PIXEL_RATIO[self.initial_zoom]
|
||||
|
||||
options = webdriver.ChromeOptions()
|
||||
# options.add_experimental_option("detach", True)
|
||||
self.driver = webdriver.Chrome(options)
|
||||
self.driver.get(generateURL())
|
||||
self.driver.get(generateURL(initial_lat, initial_lon, initial_zoom))
|
||||
self.driver.maximize_window()
|
||||
sleep(2)
|
||||
|
||||
action = ActionChains(self.driver)
|
||||
|
||||
# Закрытие левой панели
|
||||
action = ActionChains(self.driver)
|
||||
action.click(self.driver.find_element(By.CLASS_NAME, 'sidebar-toggle-button'))
|
||||
action.perform()
|
||||
|
||||
@@ -35,8 +44,25 @@ class YandexMap:
|
||||
self.driver.execute_script("arguments[0].remove();", self.driver.find_element(By.XPATH, "//nav[@class='map-controls']"))
|
||||
self.driver.execute_script("arguments[0].remove();", self.driver.find_element(By.XPATH, "//footer"))
|
||||
|
||||
self.move(39, -9)
|
||||
|
||||
sleep(0.2)
|
||||
|
||||
def open(self, lat, lon, zoom):
|
||||
self.initial_lat = lat
|
||||
self.initial_lon = lon
|
||||
self.initial_zoom = zoom
|
||||
self.pixel_ratio = constants.YANDEX_PIXEL_RATIO[self.initial_zoom]
|
||||
self.driver.get(generateURL(lat, lon, zoom))
|
||||
sleep(2)
|
||||
|
||||
# Закрытие левой панели
|
||||
action = ActionChains(self.driver)
|
||||
action.click(self.driver.find_element(By.CLASS_NAME, 'sidebar-toggle-button'))
|
||||
action.perform()
|
||||
|
||||
self.move(39, -9)
|
||||
|
||||
def save_photo(self, filename: str) -> bytes:
|
||||
return self.driver.save_screenshot(filename)
|
||||
|
||||
@@ -60,51 +86,26 @@ class YandexMap:
|
||||
if i != count - 1:
|
||||
sleep(0.25)
|
||||
|
||||
def make_as_center(self, x: float, y: float):
|
||||
def move(self, dx: float, dy: float):
|
||||
html = self.driver.find_element(By.TAG_NAME, 'html')
|
||||
|
||||
action = ActionChains(self.driver)
|
||||
action.move_to_element_with_offset(html, 0, 0)
|
||||
action.click_and_hold()
|
||||
|
||||
dx = (x - 0.5) * self.get_size()[0]
|
||||
dy = (0.5 - y) * self.get_size()[1]
|
||||
print(dx, dy)
|
||||
action.move_by_offset(-dx, dy)
|
||||
action.release()
|
||||
action.perform()
|
||||
|
||||
def make_screenshot(self, x: float, y: float, width: float, height: float) -> cv2.typing.MatLike:
|
||||
# Сохраняем скриншот
|
||||
self.save_photo("temp.png")
|
||||
|
||||
# Загружаем изображение
|
||||
image = cv2.imread("temp.png")
|
||||
|
||||
if image is None:
|
||||
raise ValueError("Не удалось загрузить изображение temp.png")
|
||||
|
||||
# Получаем размеры исходного изображения
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
# Преобразуем относительные координаты в абсолютные пиксели
|
||||
center_x = int(x * img_width)
|
||||
center_y = int(y * img_height)
|
||||
crop_width = int(width * img_width)
|
||||
crop_height = int(height * img_height)
|
||||
|
||||
# Вычисляем координаты прямоугольника для кадрирования
|
||||
x1 = max(0, center_x - crop_width // 2)
|
||||
y1 = max(0, center_y - crop_height // 2)
|
||||
x2 = min(img_width, center_x + crop_width // 2)
|
||||
y2 = min(img_height, center_y + crop_height // 2)
|
||||
|
||||
# Проверяем, что прямоугольник имеет положительные размеры
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
raise ValueError("Некорректные размеры для кадрирования")
|
||||
|
||||
# Кадрируем изображение
|
||||
cropped_image = image[y1:y2, x1:x2]
|
||||
|
||||
# Если нужно вернуть изображение как результат функции:
|
||||
return cropped_image
|
||||
|
||||
def make_as_center(self, x: float, y: float):
|
||||
dx = (x - 0.5) * self.get_size()[0]
|
||||
dy = (0.5 - y) * self.get_size()[1]
|
||||
self.move(dx, dy)
|
||||
|
||||
def make_screenshot(self) -> Image.Image:
|
||||
png = self.driver.get_screenshot_as_png()
|
||||
im = Image.open(BytesIO(png))
|
||||
return utility.cv2_to_pil(np.array(im)[:, :])
|
||||
|
||||
def get_geolocation(self):
|
||||
current_url = self.driver.current_url
|
||||
return utility.parse_yandex_maps_url(current_url)
|
||||
|
||||
Reference in New Issue
Block a user