runtime-rs: enhance debug info for agent connect.

we need more friendly logs for debugging agent conntion
cases when kata pods fail.

Fixes #10057

Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
This commit is contained in:
Alex Lyn 2024-07-23 16:06:16 +08:00
parent d69950e5c6
commit 36385a114d
3 changed files with 17 additions and 4 deletions

View File

@ -113,7 +113,13 @@ impl KataAgent {
sock::new(&inner.socket_address, inner.config.server_port).context("new sock")?; sock::new(&inner.socket_address, inner.config.server_port).context("new sock")?;
let stream = sock.connect(&config).await.context("connect")?; let stream = sock.connect(&config).await.context("connect")?;
let fd = stream.into_raw_fd(); let fd = stream.into_raw_fd();
info!(sl!(), "get stream raw fd {:?}", fd); info!(
sl!(),
"get stream raw fd {:?} with socket address: {:?} and server_port {:?}",
fd,
&inner.socket_address,
inner.config.server_port
);
let c = Client::new(fd); let c = Client::new(fd);
inner.client = Some(c); inner.client = Some(c);
inner.client_fd = fd; inner.client_fd = fd;

View File

@ -69,11 +69,15 @@ impl Sock for Vsock {
); );
return Ok(Stream::Vsock(stream)); return Ok(Stream::Vsock(stream));
} }
Err(_) => { Err(e) => {
debug!(sl!(), "retry after {} ms: failed to connect to agent via vsock at {} attempts: {:?}", config.dial_timeout_ms, i, e);
tokio::time::sleep(Duration::from_millis(config.dial_timeout_ms)).await; tokio::time::sleep(Duration::from_millis(config.dial_timeout_ms)).await;
} }
} }
} }
Err(anyhow!("cannot connect to agent ttrpc server {:?}", config)) Err(anyhow!(
"cannot connect vsock to agent ttrpc server {:?}",
config
))
} }
} }

View File

@ -382,7 +382,10 @@ impl Sandbox for VirtSandbox {
.get_agent_socket() .get_agent_socket()
.await .await
.context("get agent socket")?; .context("get agent socket")?;
self.agent.start(&address).await.context("connect")?; self.agent
.start(&address)
.await
.context(format!("connect to address {:?}", &address))?;
self.resource_manager self.resource_manager
.setup_after_start_vm() .setup_after_start_vm()