- Add a new section to the web UI to display pupil detection data and a live camera stream with YOLO segmentation. - Add a /video_feed endpoint to stream the annotated camera feed. - Update the VisionSystem to support onnxruntime-gpu with a fallback to CPU. - Add logging to indicate which backend is being used. - Refactor the test suite to accommodate the new features and fix existing tests.
343 lines
12 KiB
Python
343 lines
12 KiB
Python
import sys
|
|
import platform
|
|
import os
|
|
import numpy as np
|
|
import cv2
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
class VisionSystem:
|
|
"""
|
|
The main class for the vision system, responsible for pupil segmentation.
|
|
It uses a platform-specific backend for the actual implementation.
|
|
"""
|
|
|
|
def __init__(self, config):
|
|
self.config = config
|
|
self._backend = self._initialize_backend()
|
|
|
|
def _initialize_backend(self):
|
|
"""
|
|
Initializes the appropriate backend based on the environment and OS.
|
|
"""
|
|
# If in a test environment, use the MockBackend
|
|
if os.environ.get("PUPILOMETER_ENV") == "test":
|
|
logging.info("PUPILOMETER_ENV is set to 'test'. Initializing Mock backend.")
|
|
return MockBackend(self.config)
|
|
|
|
os_name = platform.system()
|
|
|
|
if os_name == "Linux" or os_name == "Windows":
|
|
logging.info(f"Operating system is {os_name}. Attempting to initialize DeepStream backend.")
|
|
try:
|
|
import gi
|
|
gi.require_version('Gst', '1.0')
|
|
from gi.repository import Gst
|
|
Gst.init(None)
|
|
logging.info("DeepStream (GStreamer) is available.")
|
|
return DeepStreamBackend(self.config)
|
|
except (ImportError, ValueError) as e:
|
|
logging.warning(f"Could not initialize DeepStreamBackend: {e}. Falling back to PythonBackend.")
|
|
return PythonBackend(self.config)
|
|
elif os_name == "Darwin":
|
|
logging.info("Operating system is macOS. Initializing Python backend.")
|
|
return PythonBackend(self.config)
|
|
else:
|
|
logging.error(f"Unsupported operating system: {os_name}")
|
|
raise NotImplementedError(f"Unsupported operating system: {os_name}")
|
|
|
|
def start(self):
|
|
"""
|
|
Starts the vision system.
|
|
"""
|
|
self._backend.start()
|
|
|
|
def stop(self):
|
|
"""
|
|
Stops the vision system.
|
|
"""
|
|
self._backend.stop()
|
|
|
|
def get_pupil_data(self):
|
|
"""
|
|
Returns the latest pupil segmentation data.
|
|
"""
|
|
return self._backend.get_pupil_data()
|
|
|
|
def get_annotated_frame(self):
|
|
"""
|
|
Returns the latest annotated frame.
|
|
"""
|
|
return self._backend.get_annotated_frame()
|
|
|
|
|
|
class MockBackend:
|
|
"""
|
|
A mock backend for testing purposes.
|
|
"""
|
|
def __init__(self, config):
|
|
self.config = config
|
|
logging.info("MockBackend initialized.")
|
|
|
|
def start(self):
|
|
logging.info("MockBackend started.")
|
|
pass
|
|
|
|
def stop(self):
|
|
logging.info("MockBackend stopped.")
|
|
pass
|
|
|
|
def get_pupil_data(self):
|
|
logging.info("Getting pupil data from MockBackend.")
|
|
return {
|
|
"pupil_position": (123, 456),
|
|
"pupil_diameter": 789,
|
|
"info": "mock_data"
|
|
}
|
|
|
|
def get_annotated_frame(self):
|
|
"""
|
|
Returns a placeholder image.
|
|
"""
|
|
frame = np.zeros((480, 640, 3), np.uint8)
|
|
cv2.putText(frame, "Mock Camera Feed", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
|
|
return frame
|
|
|
|
|
|
class DeepStreamBackend:
|
|
"""
|
|
A class to handle pupil segmentation on Jetson/Windows using DeepStream.
|
|
"""
|
|
|
|
def __init__(self, config):
|
|
"""
|
|
Initializes the DeepStreamBackend.
|
|
|
|
Args:
|
|
config (dict): A dictionary containing configuration parameters.
|
|
"""
|
|
from deepstream_pipeline import DeepStreamPipeline
|
|
self.config = config
|
|
self.pipeline = DeepStreamPipeline(config)
|
|
logging.info("DeepStreamBackend initialized.")
|
|
|
|
def start(self):
|
|
"""
|
|
Starts the DeepStream pipeline.
|
|
"""
|
|
self.pipeline.start()
|
|
logging.info("DeepStreamBackend started.")
|
|
|
|
def stop(self):
|
|
"""
|
|
Stops the DeepStream pipeline.
|
|
"""
|
|
self.pipeline.stop()
|
|
logging.info("DeepStreamBackend stopped.")
|
|
|
|
def get_pupil_data(self):
|
|
"""
|
|
Retrieves pupil data from the DeepStream pipeline.
|
|
"""
|
|
return self.pipeline.get_data()
|
|
|
|
def get_annotated_frame(self):
|
|
"""
|
|
Retrieves the annotated frame from the DeepStream pipeline.
|
|
"""
|
|
return self.pipeline.get_annotated_frame()
|
|
|
|
|
|
class PythonBackend:
|
|
"""
|
|
A class to handle pupil segmentation on macOS using pypylon and ONNX Runtime.
|
|
"""
|
|
|
|
def __init__(self, config):
|
|
"""
|
|
Initializes the PythonBackend.
|
|
|
|
Args:
|
|
config (dict): A dictionary containing configuration parameters
|
|
such as 'model_path'.
|
|
"""
|
|
self.config = config
|
|
self.camera = None
|
|
self.inference_session = None
|
|
self.annotated_frame = None
|
|
|
|
try:
|
|
import onnxruntime as ort
|
|
if 'CUDAExecutionProvider' in ort.get_available_providers():
|
|
logging.info("CUDA is available. Using onnxruntime-gpu.")
|
|
self.ort = ort
|
|
else:
|
|
raise ImportError("CUDAExecutionProvider not found.")
|
|
except ImportError:
|
|
logging.warning("onnxruntime-gpu is not available or CUDA is not configured. Falling back to onnxruntime (CPU).")
|
|
import onnxruntime as ort
|
|
self.ort = ort
|
|
|
|
logging.info("PythonBackend initialized.")
|
|
|
|
def start(self):
|
|
"""
|
|
Initializes the Basler camera and loads the ONNX model.
|
|
"""
|
|
try:
|
|
from pypylon import pylon
|
|
except ImportError:
|
|
raise ImportError("pypylon is not installed. Cannot start PythonBackend.")
|
|
|
|
try:
|
|
# Initialize the camera
|
|
self.camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())
|
|
self.camera.Open()
|
|
# Start grabbing continuously
|
|
self.camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly)
|
|
logging.info("PythonBackend: Basler camera opened and started grabbing.")
|
|
except Exception as e:
|
|
logging.error(f"PythonBackend: Error opening Basler camera: {e}")
|
|
self.camera = None
|
|
|
|
try:
|
|
# Load the ONNX model
|
|
self.inference_session = self.ort.InferenceSession(self.config['model_path'])
|
|
logging.info(f"PythonBackend: ONNX model loaded from {self.config['model_path']}.")
|
|
except Exception as e:
|
|
logging.error(f"PythonBackend: Error loading ONNX model: {e}")
|
|
self.inference_session = None
|
|
|
|
logging.info("PythonBackend started.")
|
|
|
|
def stop(self):
|
|
"""
|
|
Releases the camera resources.
|
|
"""
|
|
if self.camera and self.camera.IsGrabbing():
|
|
self.camera.StopGrabbing()
|
|
logging.info("PythonBackend: Basler camera stopped grabbing.")
|
|
if self.camera and self.camera.IsOpen():
|
|
self.camera.Close()
|
|
logging.info("PythonBackend: Basler camera closed.")
|
|
logging.info("PythonBackend stopped.")
|
|
|
|
def _postprocess_output(self, outputs, original_image_shape):
|
|
"""
|
|
Post-processes the raw output from the YOLOv10 model.
|
|
|
|
Args:
|
|
outputs (list): A list of numpy arrays representing the model's output.
|
|
original_image_shape (tuple): The shape of the original image (height, width).
|
|
|
|
Returns:
|
|
dict: A dictionary containing the processed pupil data.
|
|
"""
|
|
# TODO: Implement the actual post-processing logic.
|
|
# This will involve non-maximum suppression (NMS) and parsing the
|
|
# bounding boxes and segmentation masks.
|
|
|
|
logging.info("Post-processing model output...")
|
|
|
|
pupil_data = {
|
|
"raw_model_output_shape": [o.shape for o in outputs],
|
|
"pupil_position": (100, 120), # Placeholder
|
|
"pupil_diameter": 30, # Placeholder
|
|
"bounding_box": [50, 70, 150, 170] # Placeholder [x1, y1, x2, y2]
|
|
}
|
|
return pupil_data
|
|
|
|
def get_pupil_data(self):
|
|
"""
|
|
Grabs a frame from the camera, runs inference, and returns pupil data.
|
|
"""
|
|
if not self.camera or not self.camera.IsGrabbing():
|
|
logging.warning("PythonBackend: Camera not ready.")
|
|
return None
|
|
|
|
if not self.inference_session:
|
|
logging.warning("PythonBackend: Inference session not ready.")
|
|
return None
|
|
|
|
grab_result = None
|
|
try:
|
|
import cv2
|
|
import numpy as np
|
|
from pypylon import pylon
|
|
|
|
grab_result = self.camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)
|
|
if grab_result.GrabSucceeded():
|
|
image = grab_result.Array
|
|
original_shape = image.shape
|
|
|
|
# Image preprocessing
|
|
if len(image.shape) == 2:
|
|
image = cv2.cvtColor(image, cv2.COLOR_BAYER_BG2RGB)
|
|
|
|
input_shape = (640, 640)
|
|
resized_image = cv2.resize(image, input_shape)
|
|
normalized_image = resized_image.astype(np.float32) / 255.0
|
|
transposed_image = np.transpose(normalized_image, (2, 0, 1))
|
|
input_tensor = np.expand_dims(transposed_image, axis=0)
|
|
|
|
# Run inference
|
|
input_name = self.inference_session.get_inputs()[0].name
|
|
output_names = [o.name for o in self.inference_session.get_outputs()]
|
|
outputs = self.inference_session.run(output_names, {input_name: input_tensor})
|
|
|
|
# Post-process the output
|
|
pupil_data = self._postprocess_output(outputs, original_shape)
|
|
|
|
# Draw segmentation on the frame
|
|
annotated_frame = image.copy()
|
|
if pupil_data and "bounding_box" in pupil_data:
|
|
x1, y1, x2, y2 = pupil_data["bounding_box"]
|
|
cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
|
self.annotated_frame = annotated_frame
|
|
|
|
return pupil_data
|
|
else:
|
|
logging.error(f"PythonBackend: Error grabbing frame: {grab_result.ErrorCode} {grab_result.ErrorDescription}")
|
|
return None
|
|
except Exception as e:
|
|
logging.error(f"PythonBackend: An error occurred during frame grabbing or inference: {e}")
|
|
return None
|
|
finally:
|
|
if grab_result:
|
|
grab_result.Release()
|
|
|
|
def get_annotated_frame(self):
|
|
"""
|
|
Returns the latest annotated frame.
|
|
"""
|
|
return self.annotated_frame
|
|
|
|
if __name__ == '__main__':
|
|
# Example usage
|
|
config = {"camera_id": 0, "model_path": "yolov10.onnx"}
|
|
|
|
try:
|
|
vision_system = VisionSystem(config)
|
|
vision_system.start()
|
|
|
|
# In a real application, this would run in a loop
|
|
pupil_data = vision_system.get_pupil_data()
|
|
logging.info(f"Received pupil data: {pupil_data}")
|
|
|
|
# Get and show the annotated frame
|
|
annotated_frame = vision_system.get_annotated_frame()
|
|
if annotated_frame is not None:
|
|
cv2.imshow("Annotated Frame", annotated_frame)
|
|
cv2.waitKey(0)
|
|
cv2.destroyAllWindows()
|
|
|
|
vision_system.stop()
|
|
|
|
except NotImplementedError as e:
|
|
logging.error(e)
|
|
except Exception as e:
|
|
logging.error(f"An error occurred: {e}")
|
|
|