From 45b593cd7c88da40c3576a709804bccd22e957c4 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Sat, 10 Jan 2026 09:23:30 +0000 Subject: [PATCH] fix(isotest): Improve robustness of SPICE display enabler: add logging, wait-for-port and URI parsing, retries and reconnection logic, stabilization delay before configuring, and verification/retry of monitor configuration --- changelog.md | 9 ++ isotest/enable-displays.py | 209 ++++++++++++++++++++++++++++++++----- 2 files changed, 194 insertions(+), 24 deletions(-) diff --git a/changelog.md b/changelog.md index 1ad23c6..01aa98a 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,14 @@ # Changelog +## 2026-01-10 - 0.4.15 - fix(isotest) +Improve robustness of SPICE display enabler: add logging, wait-for-port and URI parsing, retries and reconnection logic, stabilization delay before configuring, and verification/retry of monitor configuration + +- Add immediate-flush logging helper for clearer background output +- Wait for SPICE TCP port (wait_for_port) and parse spice:// URIs before connecting +- Add stabilization delay before sending monitor config and track retry counts +- Add verify_and_retry to confirm configuration or retry up to configurable attempts +- Detect agent disconnects (VM reboots) and keep running to reconfigure on reconnect; add reconnect and periodic health checks + ## 2026-01-09 - 0.4.1 - fix(release-upload) clear progress timer on upload completion/error and add periodic upload progress reporting diff --git a/isotest/enable-displays.py b/isotest/enable-displays.py index aad9a1f..69a655d 100755 --- a/isotest/enable-displays.py +++ b/isotest/enable-displays.py @@ -7,6 +7,8 @@ Retries until the SPICE agent in the guest is connected. import gi import sys import time +import socket +import re gi.require_version('SpiceClientGLib', '2.0') from gi.repository import SpiceClientGLib, GLib @@ -15,6 +17,35 @@ from gi.repository import SpiceClientGLib, GLib CHANNEL_MAIN = 1 CHANNEL_DISPLAY = 2 +def log(msg): + """Print with flush for immediate output when backgrounded""" + print(msg, flush=True) + + +def wait_for_port(host, port, timeout=60): + """Wait for a TCP port to be available""" + start = time.time() + while time.time() - start < timeout: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(1) + result = sock.connect_ex((host, port)) + sock.close() + if result == 0: + return True + except: + pass + time.sleep(0.5) + return False + + +def parse_spice_uri(uri): + """Parse spice://host:port URI""" + match = re.match(r'spice://([^:]+):(\d+)', uri) + if match: + return match.group(1), int(match.group(2)) + return 'localhost', 5930 + class SpiceDisplayEnabler: def __init__(self, uri, num_displays=3, width=1920, height=1080, timeout=60): self.uri = uri @@ -29,6 +60,13 @@ class SpiceDisplayEnabler: self.configured = False self.agent_connected = False self.config_sent = False + self.config_retries = 0 + self.max_retries = 3 + self.stabilization_scheduled = False + self.connection_retries = 0 + self.max_connection_retries = 30 # Try reconnecting for up to 5 minutes + self.agent_check_count = 0 + self.configure_count = 0 # Track how many times we've configured (for reboots) def on_channel_new(self, session, channel): """Handle new channel creation""" @@ -36,44 +74,66 @@ class SpiceDisplayEnabler: channel_id = channel.get_property('channel-id') if channel_type == CHANNEL_MAIN: + log(f"Main channel received (id={channel_id})") self.main_channel = channel channel.connect_after('channel-event', self.on_channel_event) # Check agent status periodically GLib.timeout_add(500, self.check_agent_and_configure) elif channel_type == CHANNEL_DISPLAY: + log(f"Display channel received (id={channel_id})") self.display_channels.append((channel_id, channel)) def on_channel_event(self, channel, event): """Handle channel events""" + log(f"Channel event: {event}") if event == SpiceClientGLib.ChannelEvent.OPENED: # Start checking for agent GLib.timeout_add(100, self.check_agent_and_configure) def check_agent_and_configure(self): """Check if agent is connected and configure if ready""" - if self.config_sent: - return False # Stop checking + if self.stabilization_scheduled: + return True # Keep checking but don't act yet if not self.main_channel: return True # Keep checking + was_connected = self.agent_connected self.agent_connected = self.main_channel.get_property('agent-connected') + self.agent_check_count += 1 - if self.agent_connected and not self.config_sent: - print(f"Agent connected! Configuring {self.num_displays} displays...") - self.configure_monitors() - return False # Stop checking + # Detect agent disconnect (VM reboot) + if was_connected and not self.agent_connected: + log(f"Agent disconnected (VM may be rebooting)...") + self.configured = False + self.config_sent = False + self.config_retries = 0 - return True # Keep checking + # Log every 10 checks (5 seconds) + if self.agent_check_count % 10 == 0: + status = "connected" if self.agent_connected else "waiting" + log(f"Agent {status} (check #{self.agent_check_count}, configured={self.configure_count}x)") + + if self.agent_connected and not self.config_sent and not self.stabilization_scheduled: + log(f"Agent connected! Waiting 2s for stabilization...") + self.stabilization_scheduled = True + # Wait 2 seconds for agent to fully initialize before configuring + GLib.timeout_add(2000, self.configure_monitors) + + return True # Always keep checking for reboots def configure_monitors(self): """Configure multiple monitors via SPICE protocol""" - if self.config_sent: - return + if self.configured: + return False # Already done if not self.main_channel: - print("No main channel!") - return + log("No main channel!") + return False + + self.config_retries += 1 + attempt_str = f" (attempt {self.config_retries}/{self.max_retries})" if self.config_retries > 1 else "" + log(f"Configuring {self.num_displays} displays{attempt_str}...") # Enable and configure each display for i in range(self.num_displays): @@ -84,19 +144,64 @@ class SpiceDisplayEnabler: self.main_channel.update_display_enabled(i, True, False) self.main_channel.update_display(i, x, y, self.width, self.height, False) except Exception as e: - print(f" Error setting display {i}: {e}") + log(f" Error setting display {i}: {e}") # Send the configuration try: self.main_channel.send_monitor_config() self.config_sent = True - self.configured = True - print(f"Configured {self.num_displays} displays at {self.width}x{self.height}") + log(f"Sent config for {self.num_displays} displays at {self.width}x{self.height}") except Exception as e: - print(f"Error sending config: {e}") + log(f"Error sending config: {e}") - # Quit after a short delay - GLib.timeout_add(1000, self.quit) + # Schedule verification/retry after 3 seconds + GLib.timeout_add(3000, self.verify_and_retry) + return False # Don't repeat this timeout + + def verify_and_retry(self): + """Verify configuration was applied, retry if needed""" + if self.configured: + return False # Already done + + # Check if displays are actually enabled by re-checking agent state + if not self.main_channel: + log("Lost main channel during verification") + self.quit() + return False + + # The SPICE protocol doesn't provide a direct way to verify display config + # was applied. We assume success if we sent config and agent is still connected. + agent_still_connected = self.main_channel.get_property('agent-connected') + + if agent_still_connected and self.config_sent: + # Mark as configured and send again for good measure + if self.config_retries < self.max_retries: + log(f"Sending config again to ensure it takes effect...") + self.config_sent = False # Allow retry + self.configure_monitors() + else: + # We've tried enough, assume success + self.configured = True + self.configure_count += 1 + self.stabilization_scheduled = False # Allow reconfiguration after reboot + log(f"Configuration complete (configured {self.configure_count}x total)") + # Don't quit - keep running to handle VM reboots + elif not agent_still_connected: + log("Agent disconnected during verification - will retry when reconnected") + self.config_sent = False + self.config_retries = 0 + self.stabilization_scheduled = False + # Don't quit - agent will reconnect after reboot + else: + # Config not sent but agent connected - try again + if self.config_retries < self.max_retries: + log(f"Config not sent, retrying...") + self.configure_monitors() + else: + log(f"Failed after {self.config_retries} attempts") + self.quit() + + return False # Don't repeat this timeout def quit(self): self.loop.quit() @@ -105,31 +210,87 @@ class SpiceDisplayEnabler: def on_timeout(self): """Handle overall timeout""" if not self.configured: - print(f"Timeout after {self.timeout}s - agent not connected") + log(f"Timeout after {self.timeout}s - agent not connected (checks={self.agent_check_count})") self.quit() return False + def check_connection_health(self): + """Check if connection is healthy, reconnect if needed""" + log(f"Health check: configured={self.configure_count}x, main_channel={self.main_channel is not None}, agent={self.agent_connected}") + + # Don't stop checking - we need to handle reboots + if self.stabilization_scheduled: + return True # Keep checking but don't reconnect during stabilization + + # If we don't have a main channel after 10 seconds, reconnect + if not self.main_channel: + self.connection_retries += 1 + if self.connection_retries > self.max_connection_retries: + log(f"Giving up after {self.connection_retries} connection attempts") + return False + + log(f"No main channel received, reconnecting (attempt {self.connection_retries})...") + self.reconnect() + return True # Keep checking + + return True # Keep checking connection health + + def reconnect(self): + """Disconnect and reconnect to SPICE""" + if self.session: + try: + self.session.disconnect() + except: + pass + + # Reset state for new connection + self.main_channel = None + self.display_channels = [] + + # Create new session + self.session = SpiceClientGLib.Session() + self.session.set_property('uri', self.uri) + self.session.connect_after('channel-new', self.on_channel_new) + + if not self.session.connect(): + log(" Reconnection failed, will retry...") + def run(self): - print(f"Connecting to {self.uri}...") - print(f"Waiting up to {self.timeout}s for agent...") + log(f"Connecting to {self.uri}...") + log(f"Waiting up to {self.timeout}s for agent...") + + # Wait for SPICE port to be available before connecting + host, port = parse_spice_uri(self.uri) + log(f"Waiting for SPICE server at {host}:{port}...") + if not wait_for_port(host, port, timeout=60): + log(f"SPICE server not available after 60s") + return False + log(f"SPICE port {port} is open, connecting...") + + # Give SPICE server a moment to fully initialize after port opens + time.sleep(1) self.session = SpiceClientGLib.Session() self.session.set_property('uri', self.uri) self.session.connect_after('channel-new', self.on_channel_new) if not self.session.connect(): - print("Failed to connect to SPICE server") - return False + log("Initial connection failed, will retry...") + + # Check connection health every 10 seconds + GLib.timeout_add(10000, self.check_connection_health) # Set overall timeout GLib.timeout_add(self.timeout * 1000, self.on_timeout) + log("Entering main loop...") self.loop.run() + log("Main loop exited") if self.configured: - print(f"Success: {self.num_displays} displays enabled") + log(f"Success: {self.num_displays} displays enabled") else: - print("Failed: Could not enable displays") + log("Failed: Could not enable displays") return self.configured