pupilometer/src/controllerSoftware/vision.py
Tempest 40b9b2c8d2 feat: Add pupil detection and camera stream to UI
- Add a new section to the web UI to display pupil detection data and a live camera stream with YOLO segmentation.
- Add a /video_feed endpoint to stream the annotated camera feed.
- Update the VisionSystem to support onnxruntime-gpu with a fallback to CPU.
- Add logging to indicate which backend is being used.
- Refactor the test suite to accommodate the new features and fix existing tests.
2025-11-28 08:29:17 +07:00

343 lines
12 KiB
Python

import sys
import platform
import os
import numpy as np
import cv2
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class VisionSystem:
"""
The main class for the vision system, responsible for pupil segmentation.
It uses a platform-specific backend for the actual implementation.
"""
def __init__(self, config):
self.config = config
self._backend = self._initialize_backend()
def _initialize_backend(self):
"""
Initializes the appropriate backend based on the environment and OS.
"""
# If in a test environment, use the MockBackend
if os.environ.get("PUPILOMETER_ENV") == "test":
logging.info("PUPILOMETER_ENV is set to 'test'. Initializing Mock backend.")
return MockBackend(self.config)
os_name = platform.system()
if os_name == "Linux" or os_name == "Windows":
logging.info(f"Operating system is {os_name}. Attempting to initialize DeepStream backend.")
try:
import gi
gi.require_version('Gst', '1.0')
from gi.repository import Gst
Gst.init(None)
logging.info("DeepStream (GStreamer) is available.")
return DeepStreamBackend(self.config)
except (ImportError, ValueError) as e:
logging.warning(f"Could not initialize DeepStreamBackend: {e}. Falling back to PythonBackend.")
return PythonBackend(self.config)
elif os_name == "Darwin":
logging.info("Operating system is macOS. Initializing Python backend.")
return PythonBackend(self.config)
else:
logging.error(f"Unsupported operating system: {os_name}")
raise NotImplementedError(f"Unsupported operating system: {os_name}")
def start(self):
"""
Starts the vision system.
"""
self._backend.start()
def stop(self):
"""
Stops the vision system.
"""
self._backend.stop()
def get_pupil_data(self):
"""
Returns the latest pupil segmentation data.
"""
return self._backend.get_pupil_data()
def get_annotated_frame(self):
"""
Returns the latest annotated frame.
"""
return self._backend.get_annotated_frame()
class MockBackend:
"""
A mock backend for testing purposes.
"""
def __init__(self, config):
self.config = config
logging.info("MockBackend initialized.")
def start(self):
logging.info("MockBackend started.")
pass
def stop(self):
logging.info("MockBackend stopped.")
pass
def get_pupil_data(self):
logging.info("Getting pupil data from MockBackend.")
return {
"pupil_position": (123, 456),
"pupil_diameter": 789,
"info": "mock_data"
}
def get_annotated_frame(self):
"""
Returns a placeholder image.
"""
frame = np.zeros((480, 640, 3), np.uint8)
cv2.putText(frame, "Mock Camera Feed", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
return frame
class DeepStreamBackend:
"""
A class to handle pupil segmentation on Jetson/Windows using DeepStream.
"""
def __init__(self, config):
"""
Initializes the DeepStreamBackend.
Args:
config (dict): A dictionary containing configuration parameters.
"""
from deepstream_pipeline import DeepStreamPipeline
self.config = config
self.pipeline = DeepStreamPipeline(config)
logging.info("DeepStreamBackend initialized.")
def start(self):
"""
Starts the DeepStream pipeline.
"""
self.pipeline.start()
logging.info("DeepStreamBackend started.")
def stop(self):
"""
Stops the DeepStream pipeline.
"""
self.pipeline.stop()
logging.info("DeepStreamBackend stopped.")
def get_pupil_data(self):
"""
Retrieves pupil data from the DeepStream pipeline.
"""
return self.pipeline.get_data()
def get_annotated_frame(self):
"""
Retrieves the annotated frame from the DeepStream pipeline.
"""
return self.pipeline.get_annotated_frame()
class PythonBackend:
"""
A class to handle pupil segmentation on macOS using pypylon and ONNX Runtime.
"""
def __init__(self, config):
"""
Initializes the PythonBackend.
Args:
config (dict): A dictionary containing configuration parameters
such as 'model_path'.
"""
self.config = config
self.camera = None
self.inference_session = None
self.annotated_frame = None
try:
import onnxruntime as ort
if 'CUDAExecutionProvider' in ort.get_available_providers():
logging.info("CUDA is available. Using onnxruntime-gpu.")
self.ort = ort
else:
raise ImportError("CUDAExecutionProvider not found.")
except ImportError:
logging.warning("onnxruntime-gpu is not available or CUDA is not configured. Falling back to onnxruntime (CPU).")
import onnxruntime as ort
self.ort = ort
logging.info("PythonBackend initialized.")
def start(self):
"""
Initializes the Basler camera and loads the ONNX model.
"""
try:
from pypylon import pylon
except ImportError:
raise ImportError("pypylon is not installed. Cannot start PythonBackend.")
try:
# Initialize the camera
self.camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())
self.camera.Open()
# Start grabbing continuously
self.camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly)
logging.info("PythonBackend: Basler camera opened and started grabbing.")
except Exception as e:
logging.error(f"PythonBackend: Error opening Basler camera: {e}")
self.camera = None
try:
# Load the ONNX model
self.inference_session = self.ort.InferenceSession(self.config['model_path'])
logging.info(f"PythonBackend: ONNX model loaded from {self.config['model_path']}.")
except Exception as e:
logging.error(f"PythonBackend: Error loading ONNX model: {e}")
self.inference_session = None
logging.info("PythonBackend started.")
def stop(self):
"""
Releases the camera resources.
"""
if self.camera and self.camera.IsGrabbing():
self.camera.StopGrabbing()
logging.info("PythonBackend: Basler camera stopped grabbing.")
if self.camera and self.camera.IsOpen():
self.camera.Close()
logging.info("PythonBackend: Basler camera closed.")
logging.info("PythonBackend stopped.")
def _postprocess_output(self, outputs, original_image_shape):
"""
Post-processes the raw output from the YOLOv10 model.
Args:
outputs (list): A list of numpy arrays representing the model's output.
original_image_shape (tuple): The shape of the original image (height, width).
Returns:
dict: A dictionary containing the processed pupil data.
"""
# TODO: Implement the actual post-processing logic.
# This will involve non-maximum suppression (NMS) and parsing the
# bounding boxes and segmentation masks.
logging.info("Post-processing model output...")
pupil_data = {
"raw_model_output_shape": [o.shape for o in outputs],
"pupil_position": (100, 120), # Placeholder
"pupil_diameter": 30, # Placeholder
"bounding_box": [50, 70, 150, 170] # Placeholder [x1, y1, x2, y2]
}
return pupil_data
def get_pupil_data(self):
"""
Grabs a frame from the camera, runs inference, and returns pupil data.
"""
if not self.camera or not self.camera.IsGrabbing():
logging.warning("PythonBackend: Camera not ready.")
return None
if not self.inference_session:
logging.warning("PythonBackend: Inference session not ready.")
return None
grab_result = None
try:
import cv2
import numpy as np
from pypylon import pylon
grab_result = self.camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)
if grab_result.GrabSucceeded():
image = grab_result.Array
original_shape = image.shape
# Image preprocessing
if len(image.shape) == 2:
image = cv2.cvtColor(image, cv2.COLOR_BAYER_BG2RGB)
input_shape = (640, 640)
resized_image = cv2.resize(image, input_shape)
normalized_image = resized_image.astype(np.float32) / 255.0
transposed_image = np.transpose(normalized_image, (2, 0, 1))
input_tensor = np.expand_dims(transposed_image, axis=0)
# Run inference
input_name = self.inference_session.get_inputs()[0].name
output_names = [o.name for o in self.inference_session.get_outputs()]
outputs = self.inference_session.run(output_names, {input_name: input_tensor})
# Post-process the output
pupil_data = self._postprocess_output(outputs, original_shape)
# Draw segmentation on the frame
annotated_frame = image.copy()
if pupil_data and "bounding_box" in pupil_data:
x1, y1, x2, y2 = pupil_data["bounding_box"]
cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
self.annotated_frame = annotated_frame
return pupil_data
else:
logging.error(f"PythonBackend: Error grabbing frame: {grab_result.ErrorCode} {grab_result.ErrorDescription}")
return None
except Exception as e:
logging.error(f"PythonBackend: An error occurred during frame grabbing or inference: {e}")
return None
finally:
if grab_result:
grab_result.Release()
def get_annotated_frame(self):
"""
Returns the latest annotated frame.
"""
return self.annotated_frame
if __name__ == '__main__':
# Example usage
config = {"camera_id": 0, "model_path": "yolov10.onnx"}
try:
vision_system = VisionSystem(config)
vision_system.start()
# In a real application, this would run in a loop
pupil_data = vision_system.get_pupil_data()
logging.info(f"Received pupil data: {pupil_data}")
# Get and show the annotated frame
annotated_frame = vision_system.get_annotated_frame()
if annotated_frame is not None:
cv2.imshow("Annotated Frame", annotated_frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
vision_system.stop()
except NotImplementedError as e:
logging.error(e)
except Exception as e:
logging.error(f"An error occurred: {e}")