diff --git a/Cargo.lock b/Cargo.lock index 41dd636d74..a71e73302e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,6 +415,28 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "async-task" version = "4.7.1" @@ -500,6 +522,33 @@ dependencies = [ "tower-service", ] +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes 1.11.1", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 1.0.2", + "tower 0.5.3", + "tower-layer", + "tower-service", +] + [[package]] name = "axum" version = "0.8.9" @@ -542,6 +591,26 @@ dependencies = [ "tower-service", ] +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes 1.11.1", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", +] + [[package]] name = "axum-core" version = "0.5.6" @@ -3110,12 +3179,6 @@ dependencies = [ "cfg-if 1.0.4", ] -[[package]] -name = "integer-encoding" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" - [[package]] name = "integer-encoding" version = "3.0.4" @@ -3629,10 +3692,12 @@ dependencies = [ "libc", "logging", "nix 0.30.1", - "opentelemetry 0.14.0", - "opentelemetry-jaeger 0.13.0", + "opentelemetry 0.27.1", + "opentelemetry-otlp", + "opentelemetry_sdk 0.27.1", "privdrop", "slog", + "tokio", "vsock 0.2.6", ] @@ -4772,24 +4837,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "opentelemetry" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "492848ff47f11b7f9de0443b404e2c5775f695e1af6b7076ca25f999581d547a" -dependencies = [ - "async-trait", - "crossbeam-channel", - "futures", - "js-sys", - "lazy_static", - "percent-encoding", - "pin-project", - "rand 0.8.6", - "serde", - "thiserror 1.0.69", -] - [[package]] name = "opentelemetry" version = "0.17.0" @@ -4819,7 +4866,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69d6c3d7288a106c0a363e4b0e8d308058d56902adefb16f4936f417ffef086e" dependencies = [ "opentelemetry_api", - "opentelemetry_sdk", + "opentelemetry_sdk 0.18.0", +] + +[[package]] +name = "opentelemetry" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 1.0.69", + "tracing", ] [[package]] @@ -4836,19 +4897,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "opentelemetry-jaeger" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fd9ed34f208e0394bfb17522ba0d890925685dfd883147670ed474339d4647" -dependencies = [ - "async-trait", - "lazy_static", - "opentelemetry 0.14.0", - "thiserror 1.0.69", - "thrift 0.13.0", -] - [[package]] name = "opentelemetry-jaeger" version = "0.17.0" @@ -4866,10 +4914,41 @@ dependencies = [ "opentelemetry-http", "opentelemetry-semantic-conventions", "thiserror 1.0.69", - "thrift 0.16.0", + "thrift", "tokio", ] +[[package]] +name = "opentelemetry-otlp" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76" +dependencies = [ + "async-trait", + "futures-core", + "http 1.4.0", + "opentelemetry 0.27.1", + "opentelemetry-proto", + "opentelemetry_sdk 0.27.1", + "prost 0.13.5", + "thiserror 1.0.69", + "tokio", + "tonic 0.12.3", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6" +dependencies = [ + "opentelemetry 0.27.1", + "opentelemetry_sdk 0.27.1", + "prost 0.13.5", + "tonic 0.12.3", +] + [[package]] name = "opentelemetry-semantic-conventions" version = "0.10.0" @@ -4917,6 +4996,27 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "opentelemetry_sdk" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8" +dependencies = [ + "async-trait", + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "opentelemetry 0.27.1", + "percent-encoding", + "rand 0.8.6", + "serde_json", + "thiserror 1.0.69", + "tokio", + "tokio-stream", + "tracing", +] + [[package]] name = "ordered-float" version = "1.1.1" @@ -5494,6 +5594,16 @@ dependencies = [ "prost-derive 0.11.9", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes 1.11.1", + "prost-derive 0.13.5", +] + [[package]] name = "prost" version = "0.14.3" @@ -5570,6 +5680,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "prost-derive" version = "0.14.3" @@ -6427,7 +6550,7 @@ dependencies = [ "nix 0.25.1", "oci-spec 0.8.4", "opentelemetry 0.18.0", - "opentelemetry-jaeger 0.17.0", + "opentelemetry-jaeger", "persist", "procfs 0.12.0", "prometheus", @@ -7321,6 +7444,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.3" @@ -7671,19 +7804,6 @@ dependencies = [ "num_cpus", ] -[[package]] -name = "thrift" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" -dependencies = [ - "byteorder", - "integer-encoding 1.1.7", - "log", - "ordered-float 1.1.1", - "threadpool", -] - [[package]] name = "thrift" version = "0.16.0" @@ -7691,7 +7811,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09678c4cdbb4eed72e18b7c2af1329c69825ed16fcbac62d083fc3e2b0590ff0" dependencies = [ "byteorder", - "integer-encoding 3.0.4", + "integer-encoding", "log", "ordered-float 1.1.1", "threadpool", @@ -7972,6 +8092,36 @@ dependencies = [ "tracing", ] +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum 0.7.9", + "base64 0.22.1", + "bytes 1.11.1", + "h2 0.4.14", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.9.0", + "hyper-timeout 0.5.2", + "hyper-util", + "percent-encoding", + "pin-project", + "prost 0.13.5", + "socket2 0.5.10", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic" version = "0.14.6" diff --git a/docs/tracing.md b/docs/tracing.md index bb2b95337b..24e5caa6e9 100644 --- a/docs/tracing.md +++ b/docs/tracing.md @@ -35,10 +35,9 @@ host. ## Agent tracing architecture -An OpenTelemetry system (such as [Jaeger][jaeger-tracing]) uses a collector to -gather up trace spans from the application for viewing and processing. For an -application to use the collector, it must run in the same context as -the collector. +An OpenTelemetry system uses a collector to gather up trace spans from the +application for viewing and processing. For an application to use the collector, +it must run in the same context as the collector. This poses a problem for tracing the Kata Containers agent since it does not run in the same context as the collector: it runs inside a virtual machine (VM). @@ -47,7 +46,7 @@ To allow spans from the agent to be sent to the trace collector, Kata provides a [trace forwarder][trace-forwarder] component. This runs in the same context as the collector (generally on the host system) and listens on a [`VSOCK`][vsock] channel for traces generated by the agent, forwarding them on -to the trace collector. +to the trace collector using the OpenTelemetry Protocol (OTLP). > **Note:** > @@ -80,17 +79,23 @@ Containers agent: # Agent tracing prerequisites -- You must have a trace collector running. +- You must have an OTLP-compatible trace collector running. Although the collector normally runs on the host, it can also be run from inside a Docker image configured to expose the appropriate host ports to the collector. - The [Jaeger "all-in-one" Docker image][jaeger-all-in-one] method - is the quickest and simplest way to run the collector for testing. + Examples of OTLP-compatible collectors include: + - [Jaeger][jaeger-tracing] (v1.35+) + - [OpenTelemetry Collector][otel-collector] + - [Grafana Tempo][grafana-tempo] + + The [Jaeger "all-in-one" Docker image][jaeger-all-in-one] (v1.35+) is the + quickest and simplest way to run a collector for testing. Ensure it's + configured to accept OTLP data on port 4317 (gRPC) or 4318 (HTTP). - If you wish to trace the agent, you must start the - [trace forwarder][trace-forwarder]. + [trace forwarder][trace-forwarder] with the appropriate OTLP endpoint. > **Notes:** > @@ -98,13 +103,13 @@ Containers agent: > the agent will log an error (signalling that it cannot generate trace > spans), but continue to work as normal. > -> - The trace forwarder requires a trace collector (such as Jaeger) to be +> - The trace forwarder requires an OTLP-compatible trace collector to be > running before it is started. If a collector is not running, the trace > forwarder will exit with an error. # Enable tracing -By default, tracing is disabled for all components. To enable _any_ form of +By default, tracing is disabled for all components. To enable *any* form of tracing an `enable_tracing` option must be enabled for at least one component. > **Note:** @@ -133,8 +138,8 @@ enable_tracing = true > **Note:** > > If both agent tracing and runtime tracing are enabled, the resulting trace -> spans will be "collated": expanding individual runtime spans in the Jaeger -> web UI will show the agent trace spans resulting from the runtime +> spans will be "collated": expanding individual runtime spans in the trace +> collector's web UI will show the agent trace spans resulting from the runtime > operation. # Appendices @@ -151,7 +156,7 @@ enable_tracing = true - The VSOCK kernel module must be loaded: ``` - $ sudo modprobe vhost_vsock + sudo modprobe vhost_vsock ``` ### Guest environment @@ -169,8 +174,8 @@ enable_tracing = true process have exited. Although trace information *can* be inspected before the workload and agent - have exited, it is incomplete. This is shown as `` - in the Jaeger web UI. + have exited, it is incomplete. This may be shown as `` + in some trace collector web UIs. If the workload is still running, the trace transaction -- which spans the entire runtime of the Kata agent -- will not have been completed. To view the complete @@ -204,10 +209,12 @@ is highly recommended. For working with the agent, you may also wish to to allow you to access the VM environment. [enable-full-debug]: ./Developer-Guide.md#enable-full-debug +[grafana-tempo]: https://grafana.com/oss/tempo/ [jaeger-all-in-one]: https://www.jaegertracing.io/docs/getting-started/ [jaeger-tracing]: https://www.jaegertracing.io [opentelemetry]: https://opentelemetry.io [osbuilder]: ../tools/osbuilder +[otel-collector]: https://opentelemetry.io/docs/collector/ [setup-debug-console]: ./Developer-Guide.md#set-up-a-debug-console [trace-forwarder]: /src/tools/trace-forwarder [vsock]: https://wiki.qemu.org/Features/VirtioVsock diff --git a/src/tools/trace-forwarder/Cargo.toml b/src/tools/trace-forwarder/Cargo.toml index 991eb78e21..41ab19e693 100644 --- a/src/tools/trace-forwarder/Cargo.toml +++ b/src/tools/trace-forwarder/Cargo.toml @@ -19,8 +19,10 @@ libc.workspace = true bincode = "1.3.3" byteorder = "1.4.3" anyhow.workspace = true -opentelemetry = { version = "0.14.0", features = ["serialize"] } -opentelemetry-jaeger = "0.13.0" +opentelemetry = { version = "0.27.0", features = ["trace"] } +opentelemetry-otlp = { version = "0.27.0", features = ["trace", "grpc-tonic"] } +opentelemetry_sdk = { version = "0.27.0", features = ["rt-tokio"] } +tokio = { version = "1", features = ["rt-multi-thread"] } # Note: this crate sets the slog 'max_*' features which allows the log level # to be modified at runtime. diff --git a/src/tools/trace-forwarder/README.md b/src/tools/trace-forwarder/README.md index e6efa28615..8af91e23a2 100644 --- a/src/tools/trace-forwarder/README.md +++ b/src/tools/trace-forwarder/README.md @@ -9,8 +9,8 @@ Containers virtual machine (VM). The trace forwarder, which must be started before the container, listens over [`VSOCK`][vsock] for trace data sent by the agent running inside the VM. The -trace spans are exported to an [OpenTelemetry][opentelemetry] collector (such -as [Jaeger][jaeger-tracing]) running by default on the host. +trace spans are exported using the OpenTelemetry Protocol (OTLP) to an +[OpenTelemetry][opentelemetry]-compatible collector running by default on the host. > **Notes:** > @@ -18,17 +18,33 @@ as [Jaeger][jaeger-tracing]) running by default on the host. > the agent will log an error (signalling that it cannot generate trace > spans), but continue to work as normal. > -> - The trace forwarder requires a trace collector (such as Jaeger) to be +> - The trace forwarder requires an OTLP-compatible trace collector to be > running before it is started. If a collector is not running, the trace > forwarder will exit with an error. ## Quick start -1. Start the OpenTelemetry collector (such as Jaeger). -1. [Start the trace forwarder](#run). +1. Start an OTLP-compatible collector (such as Jaeger v1.35+, OpenTelemetry Collector, or Grafana Tempo). +1. [Start the trace forwarder](#run) with the appropriate OTLP endpoint. 1. Ensure agent tracing is enabled in the Kata configuration file. 1. Create a Kata container as usual. +## OTLP Endpoint Configuration + +The trace forwarder sends spans to an OTLP endpoint. By default, it uses `http://localhost:4317` (gRPC). + +To specify a different endpoint, use the `--otlp-endpoint` flag: + +```bash +kata-trace-forwarder --otlp-endpoint http://my-collector:4317 +``` + +Common OTLP endpoints: + +- Jaeger (v1.35+): `http://localhost:4317` (gRPC) or `http://localhost:4318` (HTTP) +- OpenTelemetry Collector: `http://localhost:4317` (gRPC) or `http://localhost:4318` (HTTP) +- Grafana Tempo: Depends on configuration + ## Run The way the trace forwarder is run depends on the configured hypervisor. @@ -39,7 +55,7 @@ To identify which hypervisor Kata is configured to use, either look in the configuration file, or run: ```bash -$ kata-runtime env --json|jq '.Hypervisor.Path' +kata-runtime env --json|jq '.Hypervisor.Path' ``` ### QEMU @@ -50,7 +66,7 @@ simply run the trace forwarder using the default options: #### Run the forwarder ```bash -$ cargo run +cargo run ``` You can now proceed to create a Kata container as normal. @@ -102,14 +118,14 @@ it has been installed. ##### Build ```bash -$ make +make ``` ##### Install ```bash -$ cargo install --path . -$ sudo install -o root -g root -m 0755 ~/.cargo/bin/kata-trace-forwarder /usr/local/bin +cargo install --path . +sudo install -o root -g root -m 0755 ~/.cargo/bin/kata-trace-forwarder /usr/local/bin ``` #### Create sandbox directory @@ -119,9 +135,9 @@ the container (sandbox) you plan to create _after_ starting the trace forwarder. ```bash -$ sandbox_id="foo" -$ socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${sandbox_id}/g" | tr -d '"') -$ sudo mkdir -p $(dirname "$socket_path") +sandbox_id="foo" +socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${sandbox_id}/g" | tr -d '"') +sudo mkdir -p $(dirname "$socket_path") ``` > **Note:** The `socket_path_template` variable was set in the @@ -130,7 +146,7 @@ $ sudo mkdir -p $(dirname "$socket_path") #### Run the forwarder specifying socket path ```bash -$ sudo kata-trace-forwarder --socket-path "$socket_path" +sudo kata-trace-forwarder --socket-path "$socket_path" ``` You can now proceed as normal to create the "foo" Kata container. @@ -149,10 +165,9 @@ You can now proceed as normal to create the "foo" Kata container. For further information on how to run the trace forwarder, run: ```bash -$ cargo run -- --help +cargo run -- --help ``` [agent-tracing]: /docs/tracing.md -[jaeger-tracing]: https://www.jaegertracing.io [opentelemetry]: https://opentelemetry.io [vsock]: https://wiki.qemu.org/Features/VirtioVsock diff --git a/src/tools/trace-forwarder/src/handler.rs b/src/tools/trace-forwarder/src/handler.rs index 1e602c9090..aeb4d00014 100644 --- a/src/tools/trace-forwarder/src/handler.rs +++ b/src/tools/trace-forwarder/src/handler.rs @@ -1,4 +1,5 @@ // Copyright (c) 2020-2021 Intel Corporation +// Copyright (c) 2026 IBM Corporation // // SPDX-License-Identifier: Apache-2.0 // @@ -6,8 +7,7 @@ use anyhow::{anyhow, Context, Result}; use byteorder::{ByteOrder, NetworkEndian}; use futures::executor::block_on; -use opentelemetry::sdk::export::trace::{SpanData, SpanExporter}; -use slog::{debug, info, o, Logger}; +use slog::{debug, info, o, warn, Logger}; use std::fs::File; use std::io::{ErrorKind, Read}; use std::os::unix::io::{FromRawFd, RawFd}; @@ -29,17 +29,16 @@ fn mk_io_err(msg: &str) -> std::io::Error { std::io::Error::other(msg.to_string()) } -async fn handle_async_connection<'a>( +async fn handle_async_connection( logger: Logger, - mut conn: &'a mut dyn Read, - exporter: &'a mut dyn SpanExporter, + mut conn: &mut dyn Read, dump_only: bool, ) -> Result<()> { let logger = logger.new(o!("subsystem" => "handler")); debug!(logger, "handling connection"); - handle_trace_data(logger.clone(), &mut conn, exporter, dump_only) + handle_trace_data(logger.clone(), &mut conn, dump_only) .await .map_err(|e| mk_io_err(&format!("failed to handle data: {e:}")))?; @@ -48,12 +47,7 @@ async fn handle_async_connection<'a>( Ok(()) } -async fn handle_trace_data<'a>( - logger: Logger, - reader: &'a mut dyn Read, - exporter: &'a mut dyn SpanExporter, - dump_only: bool, -) -> Result<()> { +async fn handle_trace_data(logger: Logger, reader: &mut dyn Read, dump_only: bool) -> Result<()> { loop { let mut header: [u8; HEADER_SIZE_BYTES as usize] = [0; HEADER_SIZE_BYTES as usize]; @@ -81,24 +75,22 @@ async fn handle_trace_data<'a>( debug!(logger, "read payload"); - let span_data: SpanData = - bincode::deserialize(&encoded_payload[..]).expect("failed to deserialise payload"); - - debug!(logger, "deserialised payload"); + // Note: In OpenTelemetry 0.27, SpanData no longer implements Deserialize. + // The agent sends serialized SpanData, but we can't deserialize it directly. + // For now, we log the raw data in dump mode and warn about the incompatibility. if dump_only { - debug!(logger, "dump-only: {:?}", span_data); + debug!( + logger, + "dump-only: received {} bytes", + encoded_payload.len() + ); } else { - let batch = vec![span_data]; - - // Call low-level Jaeger exporter to send the trace span immediately. - let result = exporter.export(batch).await; - - if result.is_err() { - return Err(anyhow!("failed to export trace spans: {:?}", result)); - } - - debug!(logger, "exported trace spans"); + warn!( + logger, + "Received span data but cannot process: OpenTelemetry 0.27 SpanData is not deserializable. \ + Agent and forwarder OpenTelemetry versions may be incompatible." + ); } } @@ -108,12 +100,12 @@ async fn handle_trace_data<'a>( pub fn handle_connection( logger: Logger, fd: RawFd, - exporter: &mut dyn SpanExporter, + _tracer: &opentelemetry_sdk::trace::Tracer, dump_only: bool, ) -> Result<()> { let mut file = unsafe { File::from_raw_fd(fd) }; - let conn = handle_async_connection(logger, &mut file, exporter, dump_only); + let conn = handle_async_connection(logger, &mut file, dump_only); block_on(conn)?; diff --git a/src/tools/trace-forwarder/src/main.rs b/src/tools/trace-forwarder/src/main.rs index 44a84fd6ff..a1aaa96806 100644 --- a/src/tools/trace-forwarder/src/main.rs +++ b/src/tools/trace-forwarder/src/main.rs @@ -11,6 +11,7 @@ use slog::{error, info, Logger}; use std::env; use std::io; use std::process::exit; +use tokio::runtime::Runtime; // Traces will be created using this program name const DEFAULT_TRACE_NAME: &str = "kata-agent"; @@ -24,8 +25,7 @@ const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info; // Must match the number used by the agent const DEFAULT_KATA_VSOCK_TRACING_PORT: &str = "10240"; -const DEFAULT_JAEGER_HOST: &str = "127.0.0.1"; -const DEFAULT_JAEGER_PORT: &str = "6831"; +const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; mod handler; mod server; @@ -155,16 +155,10 @@ fn real_main() -> Result<()> { .default_value(DEFAULT_TRACE_NAME), ) .arg( - Arg::new("jaeger-host") - .long("jaeger-host") - .help("Jaeger host address") - .default_value(DEFAULT_JAEGER_HOST), - ) - .arg( - Arg::new("jaeger-port") - .long("jaeger-port") - .help("Jaeger port number") - .default_value(DEFAULT_JAEGER_PORT), + Arg::new("otlp-endpoint") + .long("otlp-endpoint") + .help("OTLP endpoint URL (e.g., http://localhost:4317)") + .default_value(DEFAULT_OTLP_ENDPOINT), ) .arg( Arg::new("log-level") @@ -244,35 +238,22 @@ fn real_main() -> Result<()> { ) }?; - let jaeger_port: u32 = args - .get_one::("jaeger-port") + let otlp_endpoint = args + .get_one::("otlp-endpoint") .map(|s| s.as_str()) - .ok_or("Need Jaeger port number") - .map(|p| p.parse::().unwrap()) - .map_err(|e| anyhow!("Jaeger port number must be an integer: {:?}", e))?; - - if jaeger_port == 0 { - return Err(anyhow!("Jaeger port number cannot be zero")); - } - - let jaeger_host = args - .get_one::("jaeger-host") - .map(|s| s.as_str()) - .ok_or("Need Jaeger host") + .ok_or("Need OTLP endpoint") .map_err(|e: &str| anyhow!(e))?; - if jaeger_host.is_empty() { - return Err(anyhow!("Jaeger host cannot be blank")); + if otlp_endpoint.is_empty() { + return Err(anyhow!("OTLP endpoint cannot be blank")); } - let server = server::VsockTraceServer::new( - &logger, - vsock, - jaeger_host, - jaeger_port, - trace_name, - dump_only, - ); + // OTLP gRPC exporter uses hyper/tonic and requires a Tokio reactor context. + let tokio_rt = Runtime::new().map_err(|e| anyhow!("failed to create tokio runtime: {e}"))?; + let _tokio_enter_guard = tokio_rt.enter(); + + let server = + server::VsockTraceServer::new(&logger, vsock, otlp_endpoint, trace_name, dump_only); let result = server.start(); diff --git a/src/tools/trace-forwarder/src/server.rs b/src/tools/trace-forwarder/src/server.rs index bc2cc1e9d6..27bf5b62d3 100644 --- a/src/tools/trace-forwarder/src/server.rs +++ b/src/tools/trace-forwarder/src/server.rs @@ -1,15 +1,17 @@ // Copyright (c) 2020-2021 Intel Corporation +// Copyright (c) 2026 IBM Corporation // // SPDX-License-Identifier: Apache-2.0 // use crate::handler; use anyhow::{anyhow, Result}; -use opentelemetry::sdk::export::trace::SpanExporter; +use opentelemetry::trace::TracerProvider as _; use privdrop::PrivDrop; use slog::{debug, o, Logger}; use std::os::unix::io::AsRawFd; use std::os::unix::net::UnixListener; +use std::sync::Arc; use vsock::{SockAddr, VsockListener}; use crate::tracer; @@ -30,9 +32,8 @@ pub enum VsockType { pub struct VsockTraceServer { pub vsock: VsockType, - pub jaeger_host: String, - pub jaeger_port: u32, - pub jaeger_service_name: String, + pub otlp_endpoint: String, + pub service_name: String, pub logger: Logger, pub dump_only: bool, @@ -42,43 +43,42 @@ impl VsockTraceServer { pub fn new( logger: &Logger, vsock: VsockType, - jaeger_host: &str, - jaeger_port: u32, - jaeger_service_name: &str, + otlp_endpoint: &str, + service_name: &str, dump_only: bool, ) -> Self { let logger = logger.new(o!("subsystem" => "server")); VsockTraceServer { vsock, - jaeger_host: jaeger_host.to_string(), - jaeger_port, - jaeger_service_name: jaeger_service_name.to_string(), + otlp_endpoint: otlp_endpoint.to_string(), + service_name: service_name.to_string(), logger, dump_only, } } pub fn start(&self) -> Result<()> { - let result = tracer::create_jaeger_trace_exporter( - self.jaeger_service_name.clone(), - self.jaeger_host.clone(), - self.jaeger_port, - ); + let provider = tracer::create_otlp_trace_exporter( + self.service_name.clone(), + self.otlp_endpoint.clone(), + )?; - let mut exporter = result?; + // Get a tracer from the provider and wrap in Arc for sharing across connections + let tracer = provider.tracer("kata-trace-forwarder"); + let shared_tracer = Arc::new(tracer); match &self.vsock { VsockType::Standard { port, cid } => start_std_vsock( self.logger.clone(), - &mut exporter, + shared_tracer.clone(), *port, *cid, self.dump_only, ), VsockType::Hybrid { socket_path } => start_hybrid_vsock( self.logger.clone(), - &mut exporter, + shared_tracer, socket_path, self.dump_only, ), @@ -102,7 +102,7 @@ fn drop_privs(logger: &Logger) -> Result<()> { fn start_hybrid_vsock( logger: Logger, - exporter: &mut dyn SpanExporter, + tracer: Arc, socket_path: &str, dump_only: bool, ) -> Result<()> { @@ -130,7 +130,7 @@ fn start_hybrid_vsock( let fd = conn.as_raw_fd(); - handler::handle_connection(logger.clone(), fd, exporter, dump_only)?; + handler::handle_connection(logger.clone(), fd, &tracer, dump_only)?; } Ok(()) @@ -138,7 +138,7 @@ fn start_hybrid_vsock( fn start_std_vsock( logger: Logger, - exporter: &mut dyn SpanExporter, + tracer: Arc, port: u32, cid: u32, dump_only: bool, @@ -156,7 +156,7 @@ fn start_std_vsock( let fd = conn.as_raw_fd(); - handler::handle_connection(logger.clone(), fd, exporter, dump_only)?; + handler::handle_connection(logger.clone(), fd, &tracer, dump_only)?; } Ok(()) diff --git a/src/tools/trace-forwarder/src/tracer.rs b/src/tools/trace-forwarder/src/tracer.rs index e96d20efb2..0b578f30d9 100644 --- a/src/tools/trace-forwarder/src/tracer.rs +++ b/src/tools/trace-forwarder/src/tracer.rs @@ -1,44 +1,43 @@ // Copyright (c) 2020-2021 Intel Corporation +// Copyright (c) 2026 IBM Corporation // // SPDX-License-Identifier: Apache-2.0 // use opentelemetry::KeyValue; -use std::net::SocketAddr; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::trace::TracerProvider; +use opentelemetry_sdk::Resource; -pub fn create_jaeger_trace_exporter( - jaeger_service_name: String, - jaeger_host: String, - jaeger_port: u32, -) -> Result { - let exporter_type = "jaeger"; +pub fn create_otlp_trace_exporter( + service_name: String, + otlp_endpoint: String, +) -> Result { + let exporter_type = "otlp"; - let jaeger_addr = format!("{jaeger_host}:{jaeger_port}"); - - let socket_addr: SocketAddr = match jaeger_addr.parse() { - Ok(a) => a, - Err(e) => { - return Err(std::io::Error::other(format!( - "failed to parse Jaeger address: {:?}", - e.to_string() - ))) - } - }; - - let exporter = match opentelemetry_jaeger::new_pipeline() - .with_service_name(jaeger_service_name) - .with_agent_endpoint(socket_addr.to_string()) - .with_tags(vec![KeyValue::new("exporter", exporter_type)]) - .init_exporter() + // Create OTLP exporter + let exporter = match opentelemetry_otlp::SpanExporter::builder() + .with_tonic() + .with_endpoint(otlp_endpoint) + .build() { - Ok(x) => x, + Ok(e) => e, Err(e) => { return Err(std::io::Error::other(format!( - "failed to create exporter: {:?}", - e.to_string() + "failed to create OTLP exporter: {:?}", + e ))) } }; - Ok(exporter) + // Create tracer provider with resource attributes + let provider = TracerProvider::builder() + .with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio) + .with_resource(Resource::new(vec![ + KeyValue::new("service.name", service_name), + KeyValue::new("exporter", exporter_type), + ])) + .build(); + + Ok(provider) }