mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 06:28:11 +00:00
Merge pull request #13036 from stevenhorsman/jaeger-to-otlp-tracing-switch
trace-forwarder: migrate from Jaeger to OTLP exporter
This commit is contained in:
262
Cargo.lock
generated
262
Cargo.lock
generated
@@ -415,6 +415,28 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-stream"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
|
||||
dependencies = [
|
||||
"async-stream-impl",
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-stream-impl"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-task"
|
||||
version = "4.7.1"
|
||||
@@ -500,6 +522,33 @@ dependencies = [
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core 0.4.5",
|
||||
"bytes 1.11.1",
|
||||
"futures-util",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"itoa",
|
||||
"matchit 0.7.3",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tower 0.5.3",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.8.9"
|
||||
@@ -542,6 +591,26 @@ dependencies = [
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes 1.11.1",
|
||||
"futures-util",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.5.6"
|
||||
@@ -3110,12 +3179,6 @@ dependencies = [
|
||||
"cfg-if 1.0.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "integer-encoding"
|
||||
version = "1.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f"
|
||||
|
||||
[[package]]
|
||||
name = "integer-encoding"
|
||||
version = "3.0.4"
|
||||
@@ -3629,10 +3692,12 @@ dependencies = [
|
||||
"libc",
|
||||
"logging",
|
||||
"nix 0.30.1",
|
||||
"opentelemetry 0.14.0",
|
||||
"opentelemetry-jaeger 0.13.0",
|
||||
"opentelemetry 0.27.1",
|
||||
"opentelemetry-otlp",
|
||||
"opentelemetry_sdk 0.27.1",
|
||||
"privdrop",
|
||||
"slog",
|
||||
"tokio",
|
||||
"vsock 0.2.6",
|
||||
]
|
||||
|
||||
@@ -4772,24 +4837,6 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "492848ff47f11b7f9de0443b404e2c5775f695e1af6b7076ca25f999581d547a"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"crossbeam-channel",
|
||||
"futures",
|
||||
"js-sys",
|
||||
"lazy_static",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"rand 0.8.6",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.17.0"
|
||||
@@ -4819,7 +4866,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69d6c3d7288a106c0a363e4b0e8d308058d56902adefb16f4936f417ffef086e"
|
||||
dependencies = [
|
||||
"opentelemetry_api",
|
||||
"opentelemetry_sdk",
|
||||
"opentelemetry_sdk 0.18.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"js-sys",
|
||||
"pin-project-lite",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4836,19 +4897,6 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-jaeger"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97fd9ed34f208e0394bfb17522ba0d890925685dfd883147670ed474339d4647"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"lazy_static",
|
||||
"opentelemetry 0.14.0",
|
||||
"thiserror 1.0.69",
|
||||
"thrift 0.13.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-jaeger"
|
||||
version = "0.17.0"
|
||||
@@ -4866,10 +4914,41 @@ dependencies = [
|
||||
"opentelemetry-http",
|
||||
"opentelemetry-semantic-conventions",
|
||||
"thiserror 1.0.69",
|
||||
"thrift 0.16.0",
|
||||
"thrift",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-otlp"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-core",
|
||||
"http 1.4.0",
|
||||
"opentelemetry 0.27.1",
|
||||
"opentelemetry-proto",
|
||||
"opentelemetry_sdk 0.27.1",
|
||||
"prost 0.13.5",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
|
||||
dependencies = [
|
||||
"opentelemetry 0.27.1",
|
||||
"opentelemetry_sdk 0.27.1",
|
||||
"prost 0.13.5",
|
||||
"tonic 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.10.0"
|
||||
@@ -4917,6 +4996,27 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry_sdk"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-channel",
|
||||
"futures-executor",
|
||||
"futures-util",
|
||||
"glob",
|
||||
"opentelemetry 0.27.1",
|
||||
"percent-encoding",
|
||||
"rand 0.8.6",
|
||||
"serde_json",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "1.1.1"
|
||||
@@ -5494,6 +5594,16 @@ dependencies = [
|
||||
"prost-derive 0.11.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
|
||||
dependencies = [
|
||||
"bytes 1.11.1",
|
||||
"prost-derive 0.13.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.14.3"
|
||||
@@ -5570,6 +5680,19 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-derive"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.14.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-derive"
|
||||
version = "0.14.3"
|
||||
@@ -6427,7 +6550,7 @@ dependencies = [
|
||||
"nix 0.25.1",
|
||||
"oci-spec 0.8.4",
|
||||
"opentelemetry 0.18.0",
|
||||
"opentelemetry-jaeger 0.17.0",
|
||||
"opentelemetry-jaeger",
|
||||
"persist",
|
||||
"procfs 0.12.0",
|
||||
"prometheus",
|
||||
@@ -7321,6 +7444,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.5.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.6.3"
|
||||
@@ -7671,19 +7804,6 @@ dependencies = [
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thrift"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"integer-encoding 1.1.7",
|
||||
"log",
|
||||
"ordered-float 1.1.1",
|
||||
"threadpool",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thrift"
|
||||
version = "0.16.0"
|
||||
@@ -7691,7 +7811,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09678c4cdbb4eed72e18b7c2af1329c69825ed16fcbac62d083fc3e2b0590ff0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"integer-encoding 3.0.4",
|
||||
"integer-encoding",
|
||||
"log",
|
||||
"ordered-float 1.1.1",
|
||||
"threadpool",
|
||||
@@ -7972,6 +8092,36 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"base64 0.22.1",
|
||||
"bytes 1.11.1",
|
||||
"h2 0.4.14",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"hyper 1.9.0",
|
||||
"hyper-timeout 0.5.2",
|
||||
"hyper-util",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost 0.13.5",
|
||||
"socket2 0.5.10",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tower 0.4.13",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.14.6"
|
||||
|
||||
@@ -35,10 +35,9 @@ host.
|
||||
|
||||
## Agent tracing architecture
|
||||
|
||||
An OpenTelemetry system (such as [Jaeger][jaeger-tracing]) uses a collector to
|
||||
gather up trace spans from the application for viewing and processing. For an
|
||||
application to use the collector, it must run in the same context as
|
||||
the collector.
|
||||
An OpenTelemetry system uses a collector to gather up trace spans from the
|
||||
application for viewing and processing. For an application to use the collector,
|
||||
it must run in the same context as the collector.
|
||||
|
||||
This poses a problem for tracing the Kata Containers agent since it does not
|
||||
run in the same context as the collector: it runs inside a virtual machine (VM).
|
||||
@@ -47,7 +46,7 @@ To allow spans from the agent to be sent to the trace collector, Kata provides
|
||||
a [trace forwarder][trace-forwarder] component. This runs in the same context
|
||||
as the collector (generally on the host system) and listens on a
|
||||
[`VSOCK`][vsock] channel for traces generated by the agent, forwarding them on
|
||||
to the trace collector.
|
||||
to the trace collector using the OpenTelemetry Protocol (OTLP).
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
@@ -80,17 +79,23 @@ Containers agent:
|
||||
|
||||
# Agent tracing prerequisites
|
||||
|
||||
- You must have a trace collector running.
|
||||
- You must have an OTLP-compatible trace collector running.
|
||||
|
||||
Although the collector normally runs on the host, it can also be run from
|
||||
inside a Docker image configured to expose the appropriate host ports to the
|
||||
collector.
|
||||
|
||||
The [Jaeger "all-in-one" Docker image][jaeger-all-in-one] method
|
||||
is the quickest and simplest way to run the collector for testing.
|
||||
Examples of OTLP-compatible collectors include:
|
||||
- [Jaeger][jaeger-tracing] (v1.35+)
|
||||
- [OpenTelemetry Collector][otel-collector]
|
||||
- [Grafana Tempo][grafana-tempo]
|
||||
|
||||
The [Jaeger "all-in-one" Docker image][jaeger-all-in-one] (v1.35+) is the
|
||||
quickest and simplest way to run a collector for testing. Ensure it's
|
||||
configured to accept OTLP data on port 4317 (gRPC) or 4318 (HTTP).
|
||||
|
||||
- If you wish to trace the agent, you must start the
|
||||
[trace forwarder][trace-forwarder].
|
||||
[trace forwarder][trace-forwarder] with the appropriate OTLP endpoint.
|
||||
|
||||
> **Notes:**
|
||||
>
|
||||
@@ -98,13 +103,13 @@ Containers agent:
|
||||
> the agent will log an error (signalling that it cannot generate trace
|
||||
> spans), but continue to work as normal.
|
||||
>
|
||||
> - The trace forwarder requires a trace collector (such as Jaeger) to be
|
||||
> - The trace forwarder requires an OTLP-compatible trace collector to be
|
||||
> running before it is started. If a collector is not running, the trace
|
||||
> forwarder will exit with an error.
|
||||
|
||||
# Enable tracing
|
||||
|
||||
By default, tracing is disabled for all components. To enable _any_ form of
|
||||
By default, tracing is disabled for all components. To enable *any* form of
|
||||
tracing an `enable_tracing` option must be enabled for at least one component.
|
||||
|
||||
> **Note:**
|
||||
@@ -133,8 +138,8 @@ enable_tracing = true
|
||||
> **Note:**
|
||||
>
|
||||
> If both agent tracing and runtime tracing are enabled, the resulting trace
|
||||
> spans will be "collated": expanding individual runtime spans in the Jaeger
|
||||
> web UI will show the agent trace spans resulting from the runtime
|
||||
> spans will be "collated": expanding individual runtime spans in the trace
|
||||
> collector's web UI will show the agent trace spans resulting from the runtime
|
||||
> operation.
|
||||
|
||||
# Appendices
|
||||
@@ -151,7 +156,7 @@ enable_tracing = true
|
||||
- The VSOCK kernel module must be loaded:
|
||||
|
||||
```
|
||||
$ sudo modprobe vhost_vsock
|
||||
sudo modprobe vhost_vsock
|
||||
```
|
||||
|
||||
### Guest environment
|
||||
@@ -169,8 +174,8 @@ enable_tracing = true
|
||||
process have exited.
|
||||
|
||||
Although trace information *can* be inspected before the workload and agent
|
||||
have exited, it is incomplete. This is shown as `<trace-without-root-span>`
|
||||
in the Jaeger web UI.
|
||||
have exited, it is incomplete. This may be shown as `<trace-without-root-span>`
|
||||
in some trace collector web UIs.
|
||||
|
||||
If the workload is still running, the trace transaction -- which spans the entire
|
||||
runtime of the Kata agent -- will not have been completed. To view the complete
|
||||
@@ -204,10 +209,12 @@ is highly recommended. For working with the agent, you may also wish to
|
||||
to allow you to access the VM environment.
|
||||
|
||||
[enable-full-debug]: ./Developer-Guide.md#enable-full-debug
|
||||
[grafana-tempo]: https://grafana.com/oss/tempo/
|
||||
[jaeger-all-in-one]: https://www.jaegertracing.io/docs/getting-started/
|
||||
[jaeger-tracing]: https://www.jaegertracing.io
|
||||
[opentelemetry]: https://opentelemetry.io
|
||||
[osbuilder]: ../tools/osbuilder
|
||||
[otel-collector]: https://opentelemetry.io/docs/collector/
|
||||
[setup-debug-console]: ./Developer-Guide.md#set-up-a-debug-console
|
||||
[trace-forwarder]: /src/tools/trace-forwarder
|
||||
[vsock]: https://wiki.qemu.org/Features/VirtioVsock
|
||||
|
||||
@@ -19,8 +19,10 @@ libc.workspace = true
|
||||
bincode = "1.3.3"
|
||||
byteorder = "1.4.3"
|
||||
anyhow.workspace = true
|
||||
opentelemetry = { version = "0.14.0", features = ["serialize"] }
|
||||
opentelemetry-jaeger = "0.13.0"
|
||||
opentelemetry = { version = "0.27.0", features = ["trace"] }
|
||||
opentelemetry-otlp = { version = "0.27.0", features = ["trace", "grpc-tonic"] }
|
||||
opentelemetry_sdk = { version = "0.27.0", features = ["rt-tokio"] }
|
||||
tokio = { version = "1", features = ["rt-multi-thread"] }
|
||||
|
||||
# Note: this crate sets the slog 'max_*' features which allows the log level
|
||||
# to be modified at runtime.
|
||||
|
||||
@@ -9,8 +9,8 @@ Containers virtual machine (VM).
|
||||
|
||||
The trace forwarder, which must be started before the container, listens over
|
||||
[`VSOCK`][vsock] for trace data sent by the agent running inside the VM. The
|
||||
trace spans are exported to an [OpenTelemetry][opentelemetry] collector (such
|
||||
as [Jaeger][jaeger-tracing]) running by default on the host.
|
||||
trace spans are exported using the OpenTelemetry Protocol (OTLP) to an
|
||||
[OpenTelemetry][opentelemetry]-compatible collector running by default on the host.
|
||||
|
||||
> **Notes:**
|
||||
>
|
||||
@@ -18,17 +18,33 @@ as [Jaeger][jaeger-tracing]) running by default on the host.
|
||||
> the agent will log an error (signalling that it cannot generate trace
|
||||
> spans), but continue to work as normal.
|
||||
>
|
||||
> - The trace forwarder requires a trace collector (such as Jaeger) to be
|
||||
> - The trace forwarder requires an OTLP-compatible trace collector to be
|
||||
> running before it is started. If a collector is not running, the trace
|
||||
> forwarder will exit with an error.
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Start the OpenTelemetry collector (such as Jaeger).
|
||||
1. [Start the trace forwarder](#run).
|
||||
1. Start an OTLP-compatible collector (such as Jaeger v1.35+, OpenTelemetry Collector, or Grafana Tempo).
|
||||
1. [Start the trace forwarder](#run) with the appropriate OTLP endpoint.
|
||||
1. Ensure agent tracing is enabled in the Kata configuration file.
|
||||
1. Create a Kata container as usual.
|
||||
|
||||
## OTLP Endpoint Configuration
|
||||
|
||||
The trace forwarder sends spans to an OTLP endpoint. By default, it uses `http://localhost:4317` (gRPC).
|
||||
|
||||
To specify a different endpoint, use the `--otlp-endpoint` flag:
|
||||
|
||||
```bash
|
||||
kata-trace-forwarder --otlp-endpoint http://my-collector:4317
|
||||
```
|
||||
|
||||
Common OTLP endpoints:
|
||||
|
||||
- Jaeger (v1.35+): `http://localhost:4317` (gRPC) or `http://localhost:4318` (HTTP)
|
||||
- OpenTelemetry Collector: `http://localhost:4317` (gRPC) or `http://localhost:4318` (HTTP)
|
||||
- Grafana Tempo: Depends on configuration
|
||||
|
||||
## Run
|
||||
|
||||
The way the trace forwarder is run depends on the configured hypervisor.
|
||||
@@ -39,7 +55,7 @@ To identify which hypervisor Kata is configured to use, either look in the
|
||||
configuration file, or run:
|
||||
|
||||
```bash
|
||||
$ kata-runtime env --json|jq '.Hypervisor.Path'
|
||||
kata-runtime env --json|jq '.Hypervisor.Path'
|
||||
```
|
||||
|
||||
### QEMU
|
||||
@@ -50,7 +66,7 @@ simply run the trace forwarder using the default options:
|
||||
#### Run the forwarder
|
||||
|
||||
```bash
|
||||
$ cargo run
|
||||
cargo run
|
||||
```
|
||||
|
||||
You can now proceed to create a Kata container as normal.
|
||||
@@ -102,14 +118,14 @@ it has been installed.
|
||||
##### Build
|
||||
|
||||
```bash
|
||||
$ make
|
||||
make
|
||||
```
|
||||
|
||||
##### Install
|
||||
|
||||
```bash
|
||||
$ cargo install --path .
|
||||
$ sudo install -o root -g root -m 0755 ~/.cargo/bin/kata-trace-forwarder /usr/local/bin
|
||||
cargo install --path .
|
||||
sudo install -o root -g root -m 0755 ~/.cargo/bin/kata-trace-forwarder /usr/local/bin
|
||||
```
|
||||
|
||||
#### Create sandbox directory
|
||||
@@ -119,9 +135,9 @@ the container (sandbox) you plan to create _after_ starting the trace
|
||||
forwarder.
|
||||
|
||||
```bash
|
||||
$ sandbox_id="foo"
|
||||
$ socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${sandbox_id}/g" | tr -d '"')
|
||||
$ sudo mkdir -p $(dirname "$socket_path")
|
||||
sandbox_id="foo"
|
||||
socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${sandbox_id}/g" | tr -d '"')
|
||||
sudo mkdir -p $(dirname "$socket_path")
|
||||
```
|
||||
|
||||
> **Note:** The `socket_path_template` variable was set in the
|
||||
@@ -130,7 +146,7 @@ $ sudo mkdir -p $(dirname "$socket_path")
|
||||
#### Run the forwarder specifying socket path
|
||||
|
||||
```bash
|
||||
$ sudo kata-trace-forwarder --socket-path "$socket_path"
|
||||
sudo kata-trace-forwarder --socket-path "$socket_path"
|
||||
```
|
||||
|
||||
You can now proceed as normal to create the "foo" Kata container.
|
||||
@@ -149,10 +165,9 @@ You can now proceed as normal to create the "foo" Kata container.
|
||||
For further information on how to run the trace forwarder, run:
|
||||
|
||||
```bash
|
||||
$ cargo run -- --help
|
||||
cargo run -- --help
|
||||
```
|
||||
|
||||
[agent-tracing]: /docs/tracing.md
|
||||
[jaeger-tracing]: https://www.jaegertracing.io
|
||||
[opentelemetry]: https://opentelemetry.io
|
||||
[vsock]: https://wiki.qemu.org/Features/VirtioVsock
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright (c) 2020-2021 Intel Corporation
|
||||
// Copyright (c) 2026 IBM Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
@@ -6,8 +7,7 @@
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use byteorder::{ByteOrder, NetworkEndian};
|
||||
use futures::executor::block_on;
|
||||
use opentelemetry::sdk::export::trace::{SpanData, SpanExporter};
|
||||
use slog::{debug, info, o, Logger};
|
||||
use slog::{debug, info, o, warn, Logger};
|
||||
use std::fs::File;
|
||||
use std::io::{ErrorKind, Read};
|
||||
use std::os::unix::io::{FromRawFd, RawFd};
|
||||
@@ -29,17 +29,16 @@ fn mk_io_err(msg: &str) -> std::io::Error {
|
||||
std::io::Error::other(msg.to_string())
|
||||
}
|
||||
|
||||
async fn handle_async_connection<'a>(
|
||||
async fn handle_async_connection(
|
||||
logger: Logger,
|
||||
mut conn: &'a mut dyn Read,
|
||||
exporter: &'a mut dyn SpanExporter,
|
||||
mut conn: &mut dyn Read,
|
||||
dump_only: bool,
|
||||
) -> Result<()> {
|
||||
let logger = logger.new(o!("subsystem" => "handler"));
|
||||
|
||||
debug!(logger, "handling connection");
|
||||
|
||||
handle_trace_data(logger.clone(), &mut conn, exporter, dump_only)
|
||||
handle_trace_data(logger.clone(), &mut conn, dump_only)
|
||||
.await
|
||||
.map_err(|e| mk_io_err(&format!("failed to handle data: {e:}")))?;
|
||||
|
||||
@@ -48,12 +47,7 @@ async fn handle_async_connection<'a>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_trace_data<'a>(
|
||||
logger: Logger,
|
||||
reader: &'a mut dyn Read,
|
||||
exporter: &'a mut dyn SpanExporter,
|
||||
dump_only: bool,
|
||||
) -> Result<()> {
|
||||
async fn handle_trace_data(logger: Logger, reader: &mut dyn Read, dump_only: bool) -> Result<()> {
|
||||
loop {
|
||||
let mut header: [u8; HEADER_SIZE_BYTES as usize] = [0; HEADER_SIZE_BYTES as usize];
|
||||
|
||||
@@ -81,24 +75,22 @@ async fn handle_trace_data<'a>(
|
||||
|
||||
debug!(logger, "read payload");
|
||||
|
||||
let span_data: SpanData =
|
||||
bincode::deserialize(&encoded_payload[..]).expect("failed to deserialise payload");
|
||||
|
||||
debug!(logger, "deserialised payload");
|
||||
// Note: In OpenTelemetry 0.27, SpanData no longer implements Deserialize.
|
||||
// The agent sends serialized SpanData, but we can't deserialize it directly.
|
||||
// For now, we log the raw data in dump mode and warn about the incompatibility.
|
||||
|
||||
if dump_only {
|
||||
debug!(logger, "dump-only: {:?}", span_data);
|
||||
debug!(
|
||||
logger,
|
||||
"dump-only: received {} bytes",
|
||||
encoded_payload.len()
|
||||
);
|
||||
} else {
|
||||
let batch = vec![span_data];
|
||||
|
||||
// Call low-level Jaeger exporter to send the trace span immediately.
|
||||
let result = exporter.export(batch).await;
|
||||
|
||||
if result.is_err() {
|
||||
return Err(anyhow!("failed to export trace spans: {:?}", result));
|
||||
}
|
||||
|
||||
debug!(logger, "exported trace spans");
|
||||
warn!(
|
||||
logger,
|
||||
"Received span data but cannot process: OpenTelemetry 0.27 SpanData is not deserializable. \
|
||||
Agent and forwarder OpenTelemetry versions may be incompatible."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,12 +100,12 @@ async fn handle_trace_data<'a>(
|
||||
pub fn handle_connection(
|
||||
logger: Logger,
|
||||
fd: RawFd,
|
||||
exporter: &mut dyn SpanExporter,
|
||||
_tracer: &opentelemetry_sdk::trace::Tracer,
|
||||
dump_only: bool,
|
||||
) -> Result<()> {
|
||||
let mut file = unsafe { File::from_raw_fd(fd) };
|
||||
|
||||
let conn = handle_async_connection(logger, &mut file, exporter, dump_only);
|
||||
let conn = handle_async_connection(logger, &mut file, dump_only);
|
||||
|
||||
block_on(conn)?;
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ use slog::{error, info, Logger};
|
||||
use std::env;
|
||||
use std::io;
|
||||
use std::process::exit;
|
||||
use tokio::runtime::Runtime;
|
||||
|
||||
// Traces will be created using this program name
|
||||
const DEFAULT_TRACE_NAME: &str = "kata-agent";
|
||||
@@ -24,8 +25,7 @@ const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
|
||||
// Must match the number used by the agent
|
||||
const DEFAULT_KATA_VSOCK_TRACING_PORT: &str = "10240";
|
||||
|
||||
const DEFAULT_JAEGER_HOST: &str = "127.0.0.1";
|
||||
const DEFAULT_JAEGER_PORT: &str = "6831";
|
||||
const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";
|
||||
|
||||
mod handler;
|
||||
mod server;
|
||||
@@ -155,16 +155,10 @@ fn real_main() -> Result<()> {
|
||||
.default_value(DEFAULT_TRACE_NAME),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("jaeger-host")
|
||||
.long("jaeger-host")
|
||||
.help("Jaeger host address")
|
||||
.default_value(DEFAULT_JAEGER_HOST),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("jaeger-port")
|
||||
.long("jaeger-port")
|
||||
.help("Jaeger port number")
|
||||
.default_value(DEFAULT_JAEGER_PORT),
|
||||
Arg::new("otlp-endpoint")
|
||||
.long("otlp-endpoint")
|
||||
.help("OTLP endpoint URL (e.g., http://localhost:4317)")
|
||||
.default_value(DEFAULT_OTLP_ENDPOINT),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("log-level")
|
||||
@@ -244,35 +238,22 @@ fn real_main() -> Result<()> {
|
||||
)
|
||||
}?;
|
||||
|
||||
let jaeger_port: u32 = args
|
||||
.get_one::<String>("jaeger-port")
|
||||
let otlp_endpoint = args
|
||||
.get_one::<String>("otlp-endpoint")
|
||||
.map(|s| s.as_str())
|
||||
.ok_or("Need Jaeger port number")
|
||||
.map(|p| p.parse::<u32>().unwrap())
|
||||
.map_err(|e| anyhow!("Jaeger port number must be an integer: {:?}", e))?;
|
||||
|
||||
if jaeger_port == 0 {
|
||||
return Err(anyhow!("Jaeger port number cannot be zero"));
|
||||
}
|
||||
|
||||
let jaeger_host = args
|
||||
.get_one::<String>("jaeger-host")
|
||||
.map(|s| s.as_str())
|
||||
.ok_or("Need Jaeger host")
|
||||
.ok_or("Need OTLP endpoint")
|
||||
.map_err(|e: &str| anyhow!(e))?;
|
||||
|
||||
if jaeger_host.is_empty() {
|
||||
return Err(anyhow!("Jaeger host cannot be blank"));
|
||||
if otlp_endpoint.is_empty() {
|
||||
return Err(anyhow!("OTLP endpoint cannot be blank"));
|
||||
}
|
||||
|
||||
let server = server::VsockTraceServer::new(
|
||||
&logger,
|
||||
vsock,
|
||||
jaeger_host,
|
||||
jaeger_port,
|
||||
trace_name,
|
||||
dump_only,
|
||||
);
|
||||
// OTLP gRPC exporter uses hyper/tonic and requires a Tokio reactor context.
|
||||
let tokio_rt = Runtime::new().map_err(|e| anyhow!("failed to create tokio runtime: {e}"))?;
|
||||
let _tokio_enter_guard = tokio_rt.enter();
|
||||
|
||||
let server =
|
||||
server::VsockTraceServer::new(&logger, vsock, otlp_endpoint, trace_name, dump_only);
|
||||
|
||||
let result = server.start();
|
||||
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
// Copyright (c) 2020-2021 Intel Corporation
|
||||
// Copyright (c) 2026 IBM Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::handler;
|
||||
use anyhow::{anyhow, Result};
|
||||
use opentelemetry::sdk::export::trace::SpanExporter;
|
||||
use opentelemetry::trace::TracerProvider as _;
|
||||
use privdrop::PrivDrop;
|
||||
use slog::{debug, o, Logger};
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::os::unix::net::UnixListener;
|
||||
use std::sync::Arc;
|
||||
use vsock::{SockAddr, VsockListener};
|
||||
|
||||
use crate::tracer;
|
||||
@@ -30,9 +32,8 @@ pub enum VsockType {
|
||||
pub struct VsockTraceServer {
|
||||
pub vsock: VsockType,
|
||||
|
||||
pub jaeger_host: String,
|
||||
pub jaeger_port: u32,
|
||||
pub jaeger_service_name: String,
|
||||
pub otlp_endpoint: String,
|
||||
pub service_name: String,
|
||||
|
||||
pub logger: Logger,
|
||||
pub dump_only: bool,
|
||||
@@ -42,43 +43,42 @@ impl VsockTraceServer {
|
||||
pub fn new(
|
||||
logger: &Logger,
|
||||
vsock: VsockType,
|
||||
jaeger_host: &str,
|
||||
jaeger_port: u32,
|
||||
jaeger_service_name: &str,
|
||||
otlp_endpoint: &str,
|
||||
service_name: &str,
|
||||
dump_only: bool,
|
||||
) -> Self {
|
||||
let logger = logger.new(o!("subsystem" => "server"));
|
||||
|
||||
VsockTraceServer {
|
||||
vsock,
|
||||
jaeger_host: jaeger_host.to_string(),
|
||||
jaeger_port,
|
||||
jaeger_service_name: jaeger_service_name.to_string(),
|
||||
otlp_endpoint: otlp_endpoint.to_string(),
|
||||
service_name: service_name.to_string(),
|
||||
logger,
|
||||
dump_only,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&self) -> Result<()> {
|
||||
let result = tracer::create_jaeger_trace_exporter(
|
||||
self.jaeger_service_name.clone(),
|
||||
self.jaeger_host.clone(),
|
||||
self.jaeger_port,
|
||||
);
|
||||
let provider = tracer::create_otlp_trace_exporter(
|
||||
self.service_name.clone(),
|
||||
self.otlp_endpoint.clone(),
|
||||
)?;
|
||||
|
||||
let mut exporter = result?;
|
||||
// Get a tracer from the provider and wrap in Arc for sharing across connections
|
||||
let tracer = provider.tracer("kata-trace-forwarder");
|
||||
let shared_tracer = Arc::new(tracer);
|
||||
|
||||
match &self.vsock {
|
||||
VsockType::Standard { port, cid } => start_std_vsock(
|
||||
self.logger.clone(),
|
||||
&mut exporter,
|
||||
shared_tracer.clone(),
|
||||
*port,
|
||||
*cid,
|
||||
self.dump_only,
|
||||
),
|
||||
VsockType::Hybrid { socket_path } => start_hybrid_vsock(
|
||||
self.logger.clone(),
|
||||
&mut exporter,
|
||||
shared_tracer,
|
||||
socket_path,
|
||||
self.dump_only,
|
||||
),
|
||||
@@ -102,7 +102,7 @@ fn drop_privs(logger: &Logger) -> Result<()> {
|
||||
|
||||
fn start_hybrid_vsock(
|
||||
logger: Logger,
|
||||
exporter: &mut dyn SpanExporter,
|
||||
tracer: Arc<opentelemetry_sdk::trace::Tracer>,
|
||||
socket_path: &str,
|
||||
dump_only: bool,
|
||||
) -> Result<()> {
|
||||
@@ -130,7 +130,7 @@ fn start_hybrid_vsock(
|
||||
|
||||
let fd = conn.as_raw_fd();
|
||||
|
||||
handler::handle_connection(logger.clone(), fd, exporter, dump_only)?;
|
||||
handler::handle_connection(logger.clone(), fd, &tracer, dump_only)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -138,7 +138,7 @@ fn start_hybrid_vsock(
|
||||
|
||||
fn start_std_vsock(
|
||||
logger: Logger,
|
||||
exporter: &mut dyn SpanExporter,
|
||||
tracer: Arc<opentelemetry_sdk::trace::Tracer>,
|
||||
port: u32,
|
||||
cid: u32,
|
||||
dump_only: bool,
|
||||
@@ -156,7 +156,7 @@ fn start_std_vsock(
|
||||
|
||||
let fd = conn.as_raw_fd();
|
||||
|
||||
handler::handle_connection(logger.clone(), fd, exporter, dump_only)?;
|
||||
handler::handle_connection(logger.clone(), fd, &tracer, dump_only)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,44 +1,43 @@
|
||||
// Copyright (c) 2020-2021 Intel Corporation
|
||||
// Copyright (c) 2026 IBM Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use opentelemetry::KeyValue;
|
||||
use std::net::SocketAddr;
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use opentelemetry_sdk::trace::TracerProvider;
|
||||
use opentelemetry_sdk::Resource;
|
||||
|
||||
pub fn create_jaeger_trace_exporter(
|
||||
jaeger_service_name: String,
|
||||
jaeger_host: String,
|
||||
jaeger_port: u32,
|
||||
) -> Result<opentelemetry_jaeger::Exporter, std::io::Error> {
|
||||
let exporter_type = "jaeger";
|
||||
pub fn create_otlp_trace_exporter(
|
||||
service_name: String,
|
||||
otlp_endpoint: String,
|
||||
) -> Result<TracerProvider, std::io::Error> {
|
||||
let exporter_type = "otlp";
|
||||
|
||||
let jaeger_addr = format!("{jaeger_host}:{jaeger_port}");
|
||||
|
||||
let socket_addr: SocketAddr = match jaeger_addr.parse() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return Err(std::io::Error::other(format!(
|
||||
"failed to parse Jaeger address: {:?}",
|
||||
e.to_string()
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let exporter = match opentelemetry_jaeger::new_pipeline()
|
||||
.with_service_name(jaeger_service_name)
|
||||
.with_agent_endpoint(socket_addr.to_string())
|
||||
.with_tags(vec![KeyValue::new("exporter", exporter_type)])
|
||||
.init_exporter()
|
||||
// Create OTLP exporter
|
||||
let exporter = match opentelemetry_otlp::SpanExporter::builder()
|
||||
.with_tonic()
|
||||
.with_endpoint(otlp_endpoint)
|
||||
.build()
|
||||
{
|
||||
Ok(x) => x,
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
return Err(std::io::Error::other(format!(
|
||||
"failed to create exporter: {:?}",
|
||||
e.to_string()
|
||||
"failed to create OTLP exporter: {:?}",
|
||||
e
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(exporter)
|
||||
// Create tracer provider with resource attributes
|
||||
let provider = TracerProvider::builder()
|
||||
.with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio)
|
||||
.with_resource(Resource::new(vec![
|
||||
KeyValue::new("service.name", service_name),
|
||||
KeyValue::new("exporter", exporter_type),
|
||||
]))
|
||||
.build();
|
||||
|
||||
Ok(provider)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user