From d04bb98e09f29063c2d05ec7245cbd910e10109e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 14 Apr 2026 18:14:10 +0200 Subject: [PATCH] runtime-rs: Increase reconnect_timeout_ms for confidential VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Go runtime's CoCo dev config uses dial_timeout = 45s, but all runtime-rs confidential VM configs had reconnect_timeout_ms set to 3000ms (3s) or 5000ms (SE). This is too short for confidential VMs, especially on arm64 where UEFI firmware (AAVMF) adds significant boot time on top of the measured boot process, causing ECONNRESET errors on the vsock connection before the agent is ready. Bump reconnect_timeout_ms to 45000ms across all confidential VM configs (coco-dev, SNP, TDX, SE) to match the Go runtime. Signed-off-by: Fabiano FidĂȘncio Made-with: Cursor --- .../configuration-qemu-coco-dev-runtime-rs.toml.in | 10 +++++----- .../config/configuration-qemu-se-runtime-rs.toml.in | 10 +++++----- .../config/configuration-qemu-snp-runtime-rs.toml.in | 10 +++++----- .../config/configuration-qemu-tdx-runtime-rs.toml.in | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in index 2734d83cbb..1c37e39b64 100644 --- a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in @@ -544,17 +544,17 @@ kernel_modules = [] debug_console_enabled = false # Agent dial timeout in millisecond. -# (default: 10) -dial_timeout_ms = 10 +# (default: 100) +dial_timeout_ms = 100 # Agent reconnect timeout in millisecond. -# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300) +# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 450) # If you find pod cannot connect to the agent when starting, please # consider increasing this value to increase the retry times. # You'd better not change the value of dial_timeout_ms, unless you have an # idea of what you are doing. -# (default: 3000) -reconnect_timeout_ms = 3000 +# (default: 45000) +reconnect_timeout_ms = 45000 # Timeout in seconds for guest components (attestation-agent, confidential-data-hub) # to create their Unix sockets after being spawned by the agent. diff --git a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in index 93dbdf7846..99a6e1450f 100644 --- a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in @@ -521,17 +521,17 @@ kernel_modules = [] debug_console_enabled = false # Agent dial timeout in millisecond. -# (default: 10) -dial_timeout_ms = 90 +# (default: 100) +dial_timeout_ms = 100 # Agent reconnect timeout in millisecond. -# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300) +# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 450) # If you find pod cannot connect to the agent when starting, please # consider increasing this value to increase the retry times. # You'd better not change the value of dial_timeout_ms, unless you have an # idea of what you are doing. -# (default: 3000) -reconnect_timeout_ms = 5000 +# (default: 45000) +reconnect_timeout_ms = 45000 # Timeout in seconds for guest components (attestation-agent, confidential-data-hub) # to create their Unix sockets after being spawned by the agent. diff --git a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in index c76ca186db..b3905c29b9 100644 --- a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in @@ -563,17 +563,17 @@ kernel_modules = [] debug_console_enabled = false # Agent dial timeout in millisecond. -# (default: 10) -dial_timeout_ms = 10 +# (default: 100) +dial_timeout_ms = 100 # Agent reconnect timeout in millisecond. -# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300) +# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 450) # If you find pod cannot connect to the agent when starting, please # consider increasing this value to increase the retry times. # You'd better not change the value of dial_timeout_ms, unless you have an # idea of what you are doing. -# (default: 3000) -reconnect_timeout_ms = 3000 +# (default: 45000) +reconnect_timeout_ms = 45000 # Timeout in seconds for guest components (attestation-agent, confidential-data-hub) # to create their Unix sockets after being spawned by the agent. diff --git a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in index 2767324eaa..51bc649ead 100644 --- a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in @@ -539,17 +539,17 @@ kernel_modules = [] debug_console_enabled = false # Agent dial timeout in millisecond. -# (default: 10) -dial_timeout_ms = 10 +# (default: 100) +dial_timeout_ms = 100 # Agent reconnect timeout in millisecond. -# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 300) +# Retry times = reconnect_timeout_ms / dial_timeout_ms (default: 450) # If you find pod cannot connect to the agent when starting, please # consider increasing this value to increase the retry times. # You'd better not change the value of dial_timeout_ms, unless you have an # idea of what you are doing. -# (default: 3000) -reconnect_timeout_ms = 3000 +# (default: 45000) +reconnect_timeout_ms = 45000 # Timeout in seconds for guest components (attestation-agent, confidential-data-hub) # to create their Unix sockets after being spawned by the agent.