mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-06 11:31:12 +00:00
feat(core): Support opentelemetry exporter (#1690)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from dbgpt.util.tracer.base import (
|
||||
DBGPT_TRACER_SPAN_ID,
|
||||
Span,
|
||||
SpanStorage,
|
||||
SpanStorageType,
|
||||
@@ -28,6 +29,7 @@ __all__ = [
|
||||
"SpanStorage",
|
||||
"SpanStorageType",
|
||||
"TracerContext",
|
||||
"DBGPT_TRACER_SPAN_ID",
|
||||
"MemorySpanStorage",
|
||||
"FileSpanStorage",
|
||||
"SpanStorageContainer",
|
||||
|
@@ -1,15 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import secrets
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from dbgpt.component import BaseComponent, ComponentType, SystemApp
|
||||
|
||||
DBGPT_TRACER_SPAN_ID = "DB-GPT-Trace-Span-Id"
|
||||
|
||||
# Compatibility with OpenTelemetry API
|
||||
_TRACE_ID_MAX_VALUE = 2**128 - 1
|
||||
_SPAN_ID_MAX_VALUE = 2**64 - 1
|
||||
INVALID_SPAN_ID = 0x0000000000000000
|
||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||
|
||||
|
||||
class SpanType(str, Enum):
|
||||
BASE = "base"
|
||||
@@ -60,7 +69,7 @@ class Span:
|
||||
# Timestamp when this span ended, initially None
|
||||
self.end_time = None
|
||||
# Additional metadata associated with the span
|
||||
self.metadata = metadata
|
||||
self.metadata = metadata or {}
|
||||
self._end_callers = []
|
||||
if end_caller:
|
||||
self._end_callers.append(end_caller)
|
||||
@@ -91,13 +100,17 @@ class Span:
|
||||
"span_id": self.span_id,
|
||||
"parent_span_id": self.parent_span_id,
|
||||
"operation_name": self.operation_name,
|
||||
"start_time": None
|
||||
if not self.start_time
|
||||
else self.start_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
|
||||
"end_time": None
|
||||
if not self.end_time
|
||||
else self.end_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
|
||||
"metadata": _clean_for_json(self.metadata),
|
||||
"start_time": (
|
||||
None
|
||||
if not self.start_time
|
||||
else self.start_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
),
|
||||
"end_time": (
|
||||
None
|
||||
if not self.end_time
|
||||
else self.end_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
),
|
||||
"metadata": _clean_for_json(self.metadata) if self.metadata else None,
|
||||
}
|
||||
|
||||
def copy(self) -> Span:
|
||||
@@ -200,6 +213,60 @@ class Tracer(BaseComponent, ABC):
|
||||
"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def _new_random_trace_id(self) -> str:
|
||||
"""Create a new random trace ID."""
|
||||
|
||||
return _new_random_trace_id()
|
||||
|
||||
def _new_random_span_id(self) -> str:
|
||||
"""Create a new random span ID."""
|
||||
|
||||
return _new_random_span_id()
|
||||
|
||||
|
||||
def _new_random_trace_id() -> str:
|
||||
"""Create a new random trace ID."""
|
||||
# Generate a 128-bit hex string
|
||||
return secrets.token_hex(16)
|
||||
|
||||
|
||||
def _is_valid_trace_id(trace_id: Union[str, int]) -> bool:
|
||||
if isinstance(trace_id, str):
|
||||
try:
|
||||
trace_id = int(trace_id, 16)
|
||||
except ValueError:
|
||||
return False
|
||||
return INVALID_TRACE_ID < int(trace_id) <= _TRACE_ID_MAX_VALUE
|
||||
|
||||
|
||||
def _new_random_span_id() -> str:
|
||||
"""Create a new random span ID."""
|
||||
|
||||
# Generate a 64-bit hex string
|
||||
return secrets.token_hex(8)
|
||||
|
||||
|
||||
def _is_valid_span_id(span_id: Union[str, int]) -> bool:
|
||||
if isinstance(span_id, str):
|
||||
try:
|
||||
span_id = int(span_id, 16)
|
||||
except ValueError:
|
||||
return False
|
||||
return INVALID_SPAN_ID < int(span_id) <= _SPAN_ID_MAX_VALUE
|
||||
|
||||
|
||||
def _split_span_id(span_id: str) -> Tuple[int, int]:
|
||||
parent_span_id_parts = span_id.split(":")
|
||||
if len(parent_span_id_parts) != 2:
|
||||
return 0, 0
|
||||
trace_id, parent_span_id = parent_span_id_parts
|
||||
try:
|
||||
trace_id = int(trace_id, 16)
|
||||
span_id = int(parent_span_id, 16)
|
||||
return trace_id, span_id
|
||||
except ValueError:
|
||||
return 0, 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class TracerContext:
|
||||
@@ -240,3 +307,28 @@ def _clean_for_json(data: Optional[str, Any] = None):
|
||||
return data
|
||||
except TypeError:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_span_id(body: Any) -> Optional[str]:
|
||||
from starlette.requests import Request
|
||||
|
||||
from dbgpt._private.pydantic import BaseModel, model_to_dict
|
||||
|
||||
span_id: Optional[str] = None
|
||||
if isinstance(body, Request):
|
||||
span_id = body.headers.get(DBGPT_TRACER_SPAN_ID)
|
||||
elif isinstance(body, dict):
|
||||
span_id = body.get(DBGPT_TRACER_SPAN_ID) or body.get("span_id")
|
||||
elif isinstance(body, BaseModel):
|
||||
dict_body = model_to_dict(body)
|
||||
span_id = dict_body.get(DBGPT_TRACER_SPAN_ID) or dict_body.get("span_id")
|
||||
if not span_id:
|
||||
return None
|
||||
else:
|
||||
int_trace_id, int_span_id = _split_span_id(span_id)
|
||||
if not int_trace_id:
|
||||
return None
|
||||
if _is_valid_span_id(int_span_id) and _is_valid_trace_id(int_trace_id):
|
||||
return span_id
|
||||
else:
|
||||
return span_id
|
||||
|
122
dbgpt/util/tracer/opentelemetry.py
Normal file
122
dbgpt/util/tracer/opentelemetry.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from .base import Span, SpanStorage, _split_span_id
|
||||
|
||||
try:
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import Span as OTSpan
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.trace import SpanContext, SpanKind
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"To use OpenTelemetrySpanStorage, you must install opentelemetry-api, "
|
||||
"opentelemetry-sdk and opentelemetry-exporter-otlp."
|
||||
"You can install it via `pip install opentelemetry-api opentelemetry-sdk "
|
||||
"opentelemetry-exporter-otlp`"
|
||||
)
|
||||
|
||||
|
||||
class OpenTelemetrySpanStorage(SpanStorage):
|
||||
"""OpenTelemetry span storage."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
service_name: str,
|
||||
otlp_endpoint: Optional[str] = None,
|
||||
otlp_insecure: Optional[bool] = None,
|
||||
otlp_timeout: Optional[int] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.service_name = service_name
|
||||
|
||||
resource = Resource(attributes={"service.name": service_name})
|
||||
self.tracer_provider = TracerProvider(resource=resource)
|
||||
self.tracer = self.tracer_provider.get_tracer(__name__)
|
||||
# Store the spans that have not ended
|
||||
self.spans: Dict[str, OTSpan] = {}
|
||||
otlp_exporter = OTLPSpanExporter(
|
||||
endpoint=otlp_endpoint,
|
||||
insecure=otlp_insecure,
|
||||
timeout=otlp_timeout,
|
||||
)
|
||||
span_processor = BatchSpanProcessor(otlp_exporter)
|
||||
self.tracer_provider.add_span_processor(span_processor)
|
||||
trace.set_tracer_provider(self.tracer_provider)
|
||||
|
||||
def append_span(self, span: Span):
|
||||
span_id = span.span_id
|
||||
|
||||
if span_id in self.spans:
|
||||
otel_span = self.spans.pop(span_id)
|
||||
# Update the end time and attributes of the span
|
||||
end_time = int(span.end_time.timestamp() * 1e9) if span.end_time else None
|
||||
if span.metadata:
|
||||
for key, value in span.metadata.items():
|
||||
if isinstance(value, (bool, str, bytes, int, float)) or (
|
||||
isinstance(value, list)
|
||||
and all(
|
||||
isinstance(i, (bool, str, bytes, int, float)) for i in value
|
||||
)
|
||||
):
|
||||
otel_span.set_attribute(key, value)
|
||||
if end_time:
|
||||
otel_span.end(end_time=end_time)
|
||||
else:
|
||||
otel_span.end()
|
||||
else:
|
||||
parent_context = self._create_parent_context(span)
|
||||
# Datetime -> int
|
||||
start_time = int(span.start_time.timestamp() * 1e9)
|
||||
|
||||
otel_span = self.tracer.start_span(
|
||||
span.operation_name,
|
||||
context=parent_context,
|
||||
kind=SpanKind.INTERNAL,
|
||||
start_time=start_time,
|
||||
)
|
||||
|
||||
otel_span.set_attribute("dbgpt_trace_id", span.trace_id)
|
||||
otel_span.set_attribute("dbgpt_span_id", span.span_id)
|
||||
|
||||
if span.parent_span_id:
|
||||
otel_span.set_attribute("dbgpt_parent_span_id", span.parent_span_id)
|
||||
|
||||
otel_span.set_attribute("span_type", span.span_type.value)
|
||||
if span.metadata:
|
||||
for key, value in span.metadata.items():
|
||||
if isinstance(value, (bool, str, bytes, int, float)) or (
|
||||
isinstance(value, list)
|
||||
and all(
|
||||
isinstance(i, (bool, str, bytes, int, float)) for i in value
|
||||
)
|
||||
):
|
||||
otel_span.set_attribute(key, value)
|
||||
|
||||
if not span.end_time:
|
||||
self.spans[span_id] = otel_span
|
||||
|
||||
def append_span_batch(self, spans: List[Span]):
|
||||
for span in spans:
|
||||
self.append_span(span)
|
||||
|
||||
def _create_parent_context(self, span: Span):
|
||||
if not span.parent_span_id:
|
||||
return trace.set_span_in_context(trace.INVALID_SPAN)
|
||||
|
||||
trace_id, parent_span_id = _split_span_id(span.parent_span_id)
|
||||
if not trace_id:
|
||||
return trace.set_span_in_context(trace.INVALID_SPAN)
|
||||
|
||||
span_context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
span_id=parent_span_id,
|
||||
is_remote=True,
|
||||
trace_flags=trace.TraceFlags(0x01), # Default: SAMPLED
|
||||
)
|
||||
return trace.set_span_in_context(trace.NonRecordingSpan(span_context))
|
||||
|
||||
def close(self):
|
||||
self.tracer_provider.shutdown()
|
@@ -249,7 +249,7 @@ def chat(
|
||||
for sp in spans:
|
||||
span_type = sp["span_type"]
|
||||
metadata = sp.get("metadata")
|
||||
if span_type == SpanType.RUN:
|
||||
if span_type == SpanType.RUN and metadata and "run_service" in metadata:
|
||||
service_name = metadata["run_service"]
|
||||
service_spans[service_name] = sp.copy()
|
||||
if set(service_spans.keys()) == service_names and found_trace_id:
|
||||
|
@@ -3,7 +3,7 @@ import inspect
|
||||
import logging
|
||||
from contextvars import ContextVar
|
||||
from functools import wraps
|
||||
from typing import Dict, Optional
|
||||
from typing import Any, AsyncIterator, Dict, Optional
|
||||
|
||||
from dbgpt.component import ComponentType, SystemApp
|
||||
from dbgpt.util.module_utils import import_from_checked_string
|
||||
@@ -46,9 +46,12 @@ class DefaultTracer(Tracer):
|
||||
metadata: Dict = None,
|
||||
) -> Span:
|
||||
trace_id = (
|
||||
self._new_uuid() if parent_span_id is None else parent_span_id.split(":")[0]
|
||||
self._new_random_trace_id()
|
||||
if parent_span_id is None
|
||||
else parent_span_id.split(":")[0]
|
||||
)
|
||||
span_id = f"{trace_id}:{self._new_uuid()}"
|
||||
span_id = f"{trace_id}:{self._new_random_span_id()}"
|
||||
|
||||
span = Span(
|
||||
trace_id,
|
||||
span_id,
|
||||
@@ -164,6 +167,33 @@ class TracerManager:
|
||||
current_span = self.get_current_span()
|
||||
return current_span.span_type if current_span else None
|
||||
|
||||
def _parse_span_id(self, body: Any) -> Optional[str]:
|
||||
from .base import _parse_span_id
|
||||
|
||||
return _parse_span_id(body)
|
||||
|
||||
def wrapper_async_stream(
|
||||
self,
|
||||
generator: AsyncIterator[Any],
|
||||
operation_name: str,
|
||||
parent_span_id: str = None,
|
||||
span_type: SpanType = None,
|
||||
metadata: Dict = None,
|
||||
) -> AsyncIterator[Any]:
|
||||
"""Wrap an async generator with a span"""
|
||||
|
||||
parent_span_id = parent_span_id or self.get_current_span_id()
|
||||
|
||||
async def wrapper():
|
||||
span = self.start_span(operation_name, parent_span_id, span_type, metadata)
|
||||
try:
|
||||
async for item in generator:
|
||||
yield item
|
||||
finally:
|
||||
span.end()
|
||||
|
||||
return wrapper()
|
||||
|
||||
|
||||
root_tracer: TracerManager = TracerManager()
|
||||
|
||||
@@ -206,10 +236,14 @@ def _parse_operation_name(func, *args):
|
||||
|
||||
def initialize_tracer(
|
||||
tracer_filename: str,
|
||||
root_operation_name: str = "DB-GPT-Web-Entry",
|
||||
root_operation_name: str = "DB-GPT-Webserver",
|
||||
system_app: Optional[SystemApp] = None,
|
||||
tracer_storage_cls: Optional[str] = None,
|
||||
create_system_app: bool = False,
|
||||
enable_open_telemetry: bool = False,
|
||||
otlp_endpoint: Optional[str] = None,
|
||||
otlp_insecure: Optional[bool] = None,
|
||||
otlp_timeout: Optional[int] = None,
|
||||
):
|
||||
"""Initialize the tracer with the given filename and system app."""
|
||||
from dbgpt.util.tracer.span_storage import FileSpanStorage, SpanStorageContainer
|
||||
@@ -227,6 +261,17 @@ def initialize_tracer(
|
||||
|
||||
storage_container = SpanStorageContainer(system_app)
|
||||
storage_container.append_storage(FileSpanStorage(tracer_filename))
|
||||
if enable_open_telemetry:
|
||||
from dbgpt.util.tracer.opentelemetry import OpenTelemetrySpanStorage
|
||||
|
||||
storage_container.append_storage(
|
||||
OpenTelemetrySpanStorage(
|
||||
service_name=root_operation_name,
|
||||
otlp_endpoint=otlp_endpoint,
|
||||
otlp_insecure=otlp_insecure,
|
||||
otlp_timeout=otlp_timeout,
|
||||
)
|
||||
)
|
||||
|
||||
if tracer_storage_cls:
|
||||
logger.info(f"Begin parse storage class {tracer_storage_cls}")
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import uuid
|
||||
import logging
|
||||
from contextvars import ContextVar
|
||||
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
@@ -7,7 +7,11 @@ from starlette.types import ASGIApp
|
||||
|
||||
from dbgpt.util.tracer import Tracer, TracerContext
|
||||
|
||||
_DEFAULT_EXCLUDE_PATHS = ["/api/controller/heartbeat"]
|
||||
from .base import _parse_span_id
|
||||
|
||||
_DEFAULT_EXCLUDE_PATHS = ["/api/controller/heartbeat", "/api/health"]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TraceIDMiddleware(BaseHTTPMiddleware):
|
||||
@@ -33,11 +37,12 @@ class TraceIDMiddleware(BaseHTTPMiddleware):
|
||||
):
|
||||
return await call_next(request)
|
||||
|
||||
span_id = request.headers.get("DBGPT_TRACER_SPAN_ID")
|
||||
# if not span_id:
|
||||
# span_id = str(uuid.uuid4())
|
||||
# self.trace_context_var.set(TracerContext(span_id=span_id))
|
||||
|
||||
# Read trace_id from request headers
|
||||
span_id = _parse_span_id(request)
|
||||
logger.debug(
|
||||
f"TraceIDMiddleware: span_id={span_id}, path={request.url.path}, "
|
||||
f"headers={request.headers}"
|
||||
)
|
||||
with self.tracer.start_span(
|
||||
self.root_operation_name, span_id, metadata={"path": request.url.path}
|
||||
):
|
||||
|
Reference in New Issue
Block a user