fix(isotest): Improve robustness of SPICE display enabler: add logging, wait-for-port and URI parsing, retries and reconnection logic, stabilization delay before configuring, and verification/retry of monitor configuration
This commit is contained in:
@@ -1,5 +1,14 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-01-10 - 0.4.15 - fix(isotest)
|
||||
Improve robustness of SPICE display enabler: add logging, wait-for-port and URI parsing, retries and reconnection logic, stabilization delay before configuring, and verification/retry of monitor configuration
|
||||
|
||||
- Add immediate-flush logging helper for clearer background output
|
||||
- Wait for SPICE TCP port (wait_for_port) and parse spice:// URIs before connecting
|
||||
- Add stabilization delay before sending monitor config and track retry counts
|
||||
- Add verify_and_retry to confirm configuration or retry up to configurable attempts
|
||||
- Detect agent disconnects (VM reboots) and keep running to reconfigure on reconnect; add reconnect and periodic health checks
|
||||
|
||||
## 2026-01-09 - 0.4.1 - fix(release-upload)
|
||||
clear progress timer on upload completion/error and add periodic upload progress reporting
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ Retries until the SPICE agent in the guest is connected.
|
||||
import gi
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
import re
|
||||
|
||||
gi.require_version('SpiceClientGLib', '2.0')
|
||||
from gi.repository import SpiceClientGLib, GLib
|
||||
@@ -15,6 +17,35 @@ from gi.repository import SpiceClientGLib, GLib
|
||||
CHANNEL_MAIN = 1
|
||||
CHANNEL_DISPLAY = 2
|
||||
|
||||
def log(msg):
|
||||
"""Print with flush for immediate output when backgrounded"""
|
||||
print(msg, flush=True)
|
||||
|
||||
|
||||
def wait_for_port(host, port, timeout=60):
|
||||
"""Wait for a TCP port to be available"""
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.settimeout(1)
|
||||
result = sock.connect_ex((host, port))
|
||||
sock.close()
|
||||
if result == 0:
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
time.sleep(0.5)
|
||||
return False
|
||||
|
||||
|
||||
def parse_spice_uri(uri):
|
||||
"""Parse spice://host:port URI"""
|
||||
match = re.match(r'spice://([^:]+):(\d+)', uri)
|
||||
if match:
|
||||
return match.group(1), int(match.group(2))
|
||||
return 'localhost', 5930
|
||||
|
||||
class SpiceDisplayEnabler:
|
||||
def __init__(self, uri, num_displays=3, width=1920, height=1080, timeout=60):
|
||||
self.uri = uri
|
||||
@@ -29,6 +60,13 @@ class SpiceDisplayEnabler:
|
||||
self.configured = False
|
||||
self.agent_connected = False
|
||||
self.config_sent = False
|
||||
self.config_retries = 0
|
||||
self.max_retries = 3
|
||||
self.stabilization_scheduled = False
|
||||
self.connection_retries = 0
|
||||
self.max_connection_retries = 30 # Try reconnecting for up to 5 minutes
|
||||
self.agent_check_count = 0
|
||||
self.configure_count = 0 # Track how many times we've configured (for reboots)
|
||||
|
||||
def on_channel_new(self, session, channel):
|
||||
"""Handle new channel creation"""
|
||||
@@ -36,44 +74,66 @@ class SpiceDisplayEnabler:
|
||||
channel_id = channel.get_property('channel-id')
|
||||
|
||||
if channel_type == CHANNEL_MAIN:
|
||||
log(f"Main channel received (id={channel_id})")
|
||||
self.main_channel = channel
|
||||
channel.connect_after('channel-event', self.on_channel_event)
|
||||
# Check agent status periodically
|
||||
GLib.timeout_add(500, self.check_agent_and_configure)
|
||||
elif channel_type == CHANNEL_DISPLAY:
|
||||
log(f"Display channel received (id={channel_id})")
|
||||
self.display_channels.append((channel_id, channel))
|
||||
|
||||
def on_channel_event(self, channel, event):
|
||||
"""Handle channel events"""
|
||||
log(f"Channel event: {event}")
|
||||
if event == SpiceClientGLib.ChannelEvent.OPENED:
|
||||
# Start checking for agent
|
||||
GLib.timeout_add(100, self.check_agent_and_configure)
|
||||
|
||||
def check_agent_and_configure(self):
|
||||
"""Check if agent is connected and configure if ready"""
|
||||
if self.config_sent:
|
||||
return False # Stop checking
|
||||
if self.stabilization_scheduled:
|
||||
return True # Keep checking but don't act yet
|
||||
|
||||
if not self.main_channel:
|
||||
return True # Keep checking
|
||||
|
||||
was_connected = self.agent_connected
|
||||
self.agent_connected = self.main_channel.get_property('agent-connected')
|
||||
self.agent_check_count += 1
|
||||
|
||||
if self.agent_connected and not self.config_sent:
|
||||
print(f"Agent connected! Configuring {self.num_displays} displays...")
|
||||
self.configure_monitors()
|
||||
return False # Stop checking
|
||||
# Detect agent disconnect (VM reboot)
|
||||
if was_connected and not self.agent_connected:
|
||||
log(f"Agent disconnected (VM may be rebooting)...")
|
||||
self.configured = False
|
||||
self.config_sent = False
|
||||
self.config_retries = 0
|
||||
|
||||
return True # Keep checking
|
||||
# Log every 10 checks (5 seconds)
|
||||
if self.agent_check_count % 10 == 0:
|
||||
status = "connected" if self.agent_connected else "waiting"
|
||||
log(f"Agent {status} (check #{self.agent_check_count}, configured={self.configure_count}x)")
|
||||
|
||||
if self.agent_connected and not self.config_sent and not self.stabilization_scheduled:
|
||||
log(f"Agent connected! Waiting 2s for stabilization...")
|
||||
self.stabilization_scheduled = True
|
||||
# Wait 2 seconds for agent to fully initialize before configuring
|
||||
GLib.timeout_add(2000, self.configure_monitors)
|
||||
|
||||
return True # Always keep checking for reboots
|
||||
|
||||
def configure_monitors(self):
|
||||
"""Configure multiple monitors via SPICE protocol"""
|
||||
if self.config_sent:
|
||||
return
|
||||
if self.configured:
|
||||
return False # Already done
|
||||
|
||||
if not self.main_channel:
|
||||
print("No main channel!")
|
||||
return
|
||||
log("No main channel!")
|
||||
return False
|
||||
|
||||
self.config_retries += 1
|
||||
attempt_str = f" (attempt {self.config_retries}/{self.max_retries})" if self.config_retries > 1 else ""
|
||||
log(f"Configuring {self.num_displays} displays{attempt_str}...")
|
||||
|
||||
# Enable and configure each display
|
||||
for i in range(self.num_displays):
|
||||
@@ -84,19 +144,64 @@ class SpiceDisplayEnabler:
|
||||
self.main_channel.update_display_enabled(i, True, False)
|
||||
self.main_channel.update_display(i, x, y, self.width, self.height, False)
|
||||
except Exception as e:
|
||||
print(f" Error setting display {i}: {e}")
|
||||
log(f" Error setting display {i}: {e}")
|
||||
|
||||
# Send the configuration
|
||||
try:
|
||||
self.main_channel.send_monitor_config()
|
||||
self.config_sent = True
|
||||
self.configured = True
|
||||
print(f"Configured {self.num_displays} displays at {self.width}x{self.height}")
|
||||
log(f"Sent config for {self.num_displays} displays at {self.width}x{self.height}")
|
||||
except Exception as e:
|
||||
print(f"Error sending config: {e}")
|
||||
log(f"Error sending config: {e}")
|
||||
|
||||
# Quit after a short delay
|
||||
GLib.timeout_add(1000, self.quit)
|
||||
# Schedule verification/retry after 3 seconds
|
||||
GLib.timeout_add(3000, self.verify_and_retry)
|
||||
return False # Don't repeat this timeout
|
||||
|
||||
def verify_and_retry(self):
|
||||
"""Verify configuration was applied, retry if needed"""
|
||||
if self.configured:
|
||||
return False # Already done
|
||||
|
||||
# Check if displays are actually enabled by re-checking agent state
|
||||
if not self.main_channel:
|
||||
log("Lost main channel during verification")
|
||||
self.quit()
|
||||
return False
|
||||
|
||||
# The SPICE protocol doesn't provide a direct way to verify display config
|
||||
# was applied. We assume success if we sent config and agent is still connected.
|
||||
agent_still_connected = self.main_channel.get_property('agent-connected')
|
||||
|
||||
if agent_still_connected and self.config_sent:
|
||||
# Mark as configured and send again for good measure
|
||||
if self.config_retries < self.max_retries:
|
||||
log(f"Sending config again to ensure it takes effect...")
|
||||
self.config_sent = False # Allow retry
|
||||
self.configure_monitors()
|
||||
else:
|
||||
# We've tried enough, assume success
|
||||
self.configured = True
|
||||
self.configure_count += 1
|
||||
self.stabilization_scheduled = False # Allow reconfiguration after reboot
|
||||
log(f"Configuration complete (configured {self.configure_count}x total)")
|
||||
# Don't quit - keep running to handle VM reboots
|
||||
elif not agent_still_connected:
|
||||
log("Agent disconnected during verification - will retry when reconnected")
|
||||
self.config_sent = False
|
||||
self.config_retries = 0
|
||||
self.stabilization_scheduled = False
|
||||
# Don't quit - agent will reconnect after reboot
|
||||
else:
|
||||
# Config not sent but agent connected - try again
|
||||
if self.config_retries < self.max_retries:
|
||||
log(f"Config not sent, retrying...")
|
||||
self.configure_monitors()
|
||||
else:
|
||||
log(f"Failed after {self.config_retries} attempts")
|
||||
self.quit()
|
||||
|
||||
return False # Don't repeat this timeout
|
||||
|
||||
def quit(self):
|
||||
self.loop.quit()
|
||||
@@ -105,31 +210,87 @@ class SpiceDisplayEnabler:
|
||||
def on_timeout(self):
|
||||
"""Handle overall timeout"""
|
||||
if not self.configured:
|
||||
print(f"Timeout after {self.timeout}s - agent not connected")
|
||||
log(f"Timeout after {self.timeout}s - agent not connected (checks={self.agent_check_count})")
|
||||
self.quit()
|
||||
return False
|
||||
|
||||
def check_connection_health(self):
|
||||
"""Check if connection is healthy, reconnect if needed"""
|
||||
log(f"Health check: configured={self.configure_count}x, main_channel={self.main_channel is not None}, agent={self.agent_connected}")
|
||||
|
||||
# Don't stop checking - we need to handle reboots
|
||||
if self.stabilization_scheduled:
|
||||
return True # Keep checking but don't reconnect during stabilization
|
||||
|
||||
# If we don't have a main channel after 10 seconds, reconnect
|
||||
if not self.main_channel:
|
||||
self.connection_retries += 1
|
||||
if self.connection_retries > self.max_connection_retries:
|
||||
log(f"Giving up after {self.connection_retries} connection attempts")
|
||||
return False
|
||||
|
||||
log(f"No main channel received, reconnecting (attempt {self.connection_retries})...")
|
||||
self.reconnect()
|
||||
return True # Keep checking
|
||||
|
||||
return True # Keep checking connection health
|
||||
|
||||
def reconnect(self):
|
||||
"""Disconnect and reconnect to SPICE"""
|
||||
if self.session:
|
||||
try:
|
||||
self.session.disconnect()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Reset state for new connection
|
||||
self.main_channel = None
|
||||
self.display_channels = []
|
||||
|
||||
# Create new session
|
||||
self.session = SpiceClientGLib.Session()
|
||||
self.session.set_property('uri', self.uri)
|
||||
self.session.connect_after('channel-new', self.on_channel_new)
|
||||
|
||||
if not self.session.connect():
|
||||
log(" Reconnection failed, will retry...")
|
||||
|
||||
def run(self):
|
||||
print(f"Connecting to {self.uri}...")
|
||||
print(f"Waiting up to {self.timeout}s for agent...")
|
||||
log(f"Connecting to {self.uri}...")
|
||||
log(f"Waiting up to {self.timeout}s for agent...")
|
||||
|
||||
# Wait for SPICE port to be available before connecting
|
||||
host, port = parse_spice_uri(self.uri)
|
||||
log(f"Waiting for SPICE server at {host}:{port}...")
|
||||
if not wait_for_port(host, port, timeout=60):
|
||||
log(f"SPICE server not available after 60s")
|
||||
return False
|
||||
log(f"SPICE port {port} is open, connecting...")
|
||||
|
||||
# Give SPICE server a moment to fully initialize after port opens
|
||||
time.sleep(1)
|
||||
|
||||
self.session = SpiceClientGLib.Session()
|
||||
self.session.set_property('uri', self.uri)
|
||||
self.session.connect_after('channel-new', self.on_channel_new)
|
||||
|
||||
if not self.session.connect():
|
||||
print("Failed to connect to SPICE server")
|
||||
return False
|
||||
log("Initial connection failed, will retry...")
|
||||
|
||||
# Check connection health every 10 seconds
|
||||
GLib.timeout_add(10000, self.check_connection_health)
|
||||
|
||||
# Set overall timeout
|
||||
GLib.timeout_add(self.timeout * 1000, self.on_timeout)
|
||||
|
||||
log("Entering main loop...")
|
||||
self.loop.run()
|
||||
log("Main loop exited")
|
||||
|
||||
if self.configured:
|
||||
print(f"Success: {self.num_displays} displays enabled")
|
||||
log(f"Success: {self.num_displays} displays enabled")
|
||||
else:
|
||||
print("Failed: Could not enable displays")
|
||||
log("Failed: Could not enable displays")
|
||||
|
||||
return self.configured
|
||||
|
||||
|
||||
Reference in New Issue
Block a user