fix(isotest): Improve robustness of SPICE display enabler: add logging, wait-for-port and URI parsing, retries and reconnection logic, stabilization delay before configuring, and verification/retry of monitor configuration

This commit is contained in:
2026-01-10 09:23:30 +00:00
parent 352562b1a5
commit 45b593cd7c
2 changed files with 194 additions and 24 deletions

View File

@@ -1,5 +1,14 @@
# Changelog
## 2026-01-10 - 0.4.15 - fix(isotest)
Improve robustness of SPICE display enabler: add logging, wait-for-port and URI parsing, retries and reconnection logic, stabilization delay before configuring, and verification/retry of monitor configuration
- Add immediate-flush logging helper for clearer background output
- Wait for SPICE TCP port (wait_for_port) and parse spice:// URIs before connecting
- Add stabilization delay before sending monitor config and track retry counts
- Add verify_and_retry to confirm configuration or retry up to configurable attempts
- Detect agent disconnects (VM reboots) and keep running to reconfigure on reconnect; add reconnect and periodic health checks
## 2026-01-09 - 0.4.1 - fix(release-upload)
clear progress timer on upload completion/error and add periodic upload progress reporting

View File

@@ -7,6 +7,8 @@ Retries until the SPICE agent in the guest is connected.
import gi
import sys
import time
import socket
import re
gi.require_version('SpiceClientGLib', '2.0')
from gi.repository import SpiceClientGLib, GLib
@@ -15,6 +17,35 @@ from gi.repository import SpiceClientGLib, GLib
CHANNEL_MAIN = 1
CHANNEL_DISPLAY = 2
def log(msg):
"""Print with flush for immediate output when backgrounded"""
print(msg, flush=True)
def wait_for_port(host, port, timeout=60):
"""Wait for a TCP port to be available"""
start = time.time()
while time.time() - start < timeout:
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(1)
result = sock.connect_ex((host, port))
sock.close()
if result == 0:
return True
except:
pass
time.sleep(0.5)
return False
def parse_spice_uri(uri):
"""Parse spice://host:port URI"""
match = re.match(r'spice://([^:]+):(\d+)', uri)
if match:
return match.group(1), int(match.group(2))
return 'localhost', 5930
class SpiceDisplayEnabler:
def __init__(self, uri, num_displays=3, width=1920, height=1080, timeout=60):
self.uri = uri
@@ -29,6 +60,13 @@ class SpiceDisplayEnabler:
self.configured = False
self.agent_connected = False
self.config_sent = False
self.config_retries = 0
self.max_retries = 3
self.stabilization_scheduled = False
self.connection_retries = 0
self.max_connection_retries = 30 # Try reconnecting for up to 5 minutes
self.agent_check_count = 0
self.configure_count = 0 # Track how many times we've configured (for reboots)
def on_channel_new(self, session, channel):
"""Handle new channel creation"""
@@ -36,44 +74,66 @@ class SpiceDisplayEnabler:
channel_id = channel.get_property('channel-id')
if channel_type == CHANNEL_MAIN:
log(f"Main channel received (id={channel_id})")
self.main_channel = channel
channel.connect_after('channel-event', self.on_channel_event)
# Check agent status periodically
GLib.timeout_add(500, self.check_agent_and_configure)
elif channel_type == CHANNEL_DISPLAY:
log(f"Display channel received (id={channel_id})")
self.display_channels.append((channel_id, channel))
def on_channel_event(self, channel, event):
"""Handle channel events"""
log(f"Channel event: {event}")
if event == SpiceClientGLib.ChannelEvent.OPENED:
# Start checking for agent
GLib.timeout_add(100, self.check_agent_and_configure)
def check_agent_and_configure(self):
"""Check if agent is connected and configure if ready"""
if self.config_sent:
return False # Stop checking
if self.stabilization_scheduled:
return True # Keep checking but don't act yet
if not self.main_channel:
return True # Keep checking
was_connected = self.agent_connected
self.agent_connected = self.main_channel.get_property('agent-connected')
self.agent_check_count += 1
if self.agent_connected and not self.config_sent:
print(f"Agent connected! Configuring {self.num_displays} displays...")
self.configure_monitors()
return False # Stop checking
# Detect agent disconnect (VM reboot)
if was_connected and not self.agent_connected:
log(f"Agent disconnected (VM may be rebooting)...")
self.configured = False
self.config_sent = False
self.config_retries = 0
return True # Keep checking
# Log every 10 checks (5 seconds)
if self.agent_check_count % 10 == 0:
status = "connected" if self.agent_connected else "waiting"
log(f"Agent {status} (check #{self.agent_check_count}, configured={self.configure_count}x)")
if self.agent_connected and not self.config_sent and not self.stabilization_scheduled:
log(f"Agent connected! Waiting 2s for stabilization...")
self.stabilization_scheduled = True
# Wait 2 seconds for agent to fully initialize before configuring
GLib.timeout_add(2000, self.configure_monitors)
return True # Always keep checking for reboots
def configure_monitors(self):
"""Configure multiple monitors via SPICE protocol"""
if self.config_sent:
return
if self.configured:
return False # Already done
if not self.main_channel:
print("No main channel!")
return
log("No main channel!")
return False
self.config_retries += 1
attempt_str = f" (attempt {self.config_retries}/{self.max_retries})" if self.config_retries > 1 else ""
log(f"Configuring {self.num_displays} displays{attempt_str}...")
# Enable and configure each display
for i in range(self.num_displays):
@@ -84,19 +144,64 @@ class SpiceDisplayEnabler:
self.main_channel.update_display_enabled(i, True, False)
self.main_channel.update_display(i, x, y, self.width, self.height, False)
except Exception as e:
print(f" Error setting display {i}: {e}")
log(f" Error setting display {i}: {e}")
# Send the configuration
try:
self.main_channel.send_monitor_config()
self.config_sent = True
self.configured = True
print(f"Configured {self.num_displays} displays at {self.width}x{self.height}")
log(f"Sent config for {self.num_displays} displays at {self.width}x{self.height}")
except Exception as e:
print(f"Error sending config: {e}")
log(f"Error sending config: {e}")
# Quit after a short delay
GLib.timeout_add(1000, self.quit)
# Schedule verification/retry after 3 seconds
GLib.timeout_add(3000, self.verify_and_retry)
return False # Don't repeat this timeout
def verify_and_retry(self):
"""Verify configuration was applied, retry if needed"""
if self.configured:
return False # Already done
# Check if displays are actually enabled by re-checking agent state
if not self.main_channel:
log("Lost main channel during verification")
self.quit()
return False
# The SPICE protocol doesn't provide a direct way to verify display config
# was applied. We assume success if we sent config and agent is still connected.
agent_still_connected = self.main_channel.get_property('agent-connected')
if agent_still_connected and self.config_sent:
# Mark as configured and send again for good measure
if self.config_retries < self.max_retries:
log(f"Sending config again to ensure it takes effect...")
self.config_sent = False # Allow retry
self.configure_monitors()
else:
# We've tried enough, assume success
self.configured = True
self.configure_count += 1
self.stabilization_scheduled = False # Allow reconfiguration after reboot
log(f"Configuration complete (configured {self.configure_count}x total)")
# Don't quit - keep running to handle VM reboots
elif not agent_still_connected:
log("Agent disconnected during verification - will retry when reconnected")
self.config_sent = False
self.config_retries = 0
self.stabilization_scheduled = False
# Don't quit - agent will reconnect after reboot
else:
# Config not sent but agent connected - try again
if self.config_retries < self.max_retries:
log(f"Config not sent, retrying...")
self.configure_monitors()
else:
log(f"Failed after {self.config_retries} attempts")
self.quit()
return False # Don't repeat this timeout
def quit(self):
self.loop.quit()
@@ -105,31 +210,87 @@ class SpiceDisplayEnabler:
def on_timeout(self):
"""Handle overall timeout"""
if not self.configured:
print(f"Timeout after {self.timeout}s - agent not connected")
log(f"Timeout after {self.timeout}s - agent not connected (checks={self.agent_check_count})")
self.quit()
return False
def check_connection_health(self):
"""Check if connection is healthy, reconnect if needed"""
log(f"Health check: configured={self.configure_count}x, main_channel={self.main_channel is not None}, agent={self.agent_connected}")
# Don't stop checking - we need to handle reboots
if self.stabilization_scheduled:
return True # Keep checking but don't reconnect during stabilization
# If we don't have a main channel after 10 seconds, reconnect
if not self.main_channel:
self.connection_retries += 1
if self.connection_retries > self.max_connection_retries:
log(f"Giving up after {self.connection_retries} connection attempts")
return False
log(f"No main channel received, reconnecting (attempt {self.connection_retries})...")
self.reconnect()
return True # Keep checking
return True # Keep checking connection health
def reconnect(self):
"""Disconnect and reconnect to SPICE"""
if self.session:
try:
self.session.disconnect()
except:
pass
# Reset state for new connection
self.main_channel = None
self.display_channels = []
# Create new session
self.session = SpiceClientGLib.Session()
self.session.set_property('uri', self.uri)
self.session.connect_after('channel-new', self.on_channel_new)
if not self.session.connect():
log(" Reconnection failed, will retry...")
def run(self):
print(f"Connecting to {self.uri}...")
print(f"Waiting up to {self.timeout}s for agent...")
log(f"Connecting to {self.uri}...")
log(f"Waiting up to {self.timeout}s for agent...")
# Wait for SPICE port to be available before connecting
host, port = parse_spice_uri(self.uri)
log(f"Waiting for SPICE server at {host}:{port}...")
if not wait_for_port(host, port, timeout=60):
log(f"SPICE server not available after 60s")
return False
log(f"SPICE port {port} is open, connecting...")
# Give SPICE server a moment to fully initialize after port opens
time.sleep(1)
self.session = SpiceClientGLib.Session()
self.session.set_property('uri', self.uri)
self.session.connect_after('channel-new', self.on_channel_new)
if not self.session.connect():
print("Failed to connect to SPICE server")
return False
log("Initial connection failed, will retry...")
# Check connection health every 10 seconds
GLib.timeout_add(10000, self.check_connection_health)
# Set overall timeout
GLib.timeout_add(self.timeout * 1000, self.on_timeout)
log("Entering main loop...")
self.loop.run()
log("Main loop exited")
if self.configured:
print(f"Success: {self.num_displays} displays enabled")
log(f"Success: {self.num_displays} displays enabled")
else:
print("Failed: Could not enable displays")
log("Failed: Could not enable displays")
return self.configured