feat(core): Support opentelemetry exporter (#1690)

This commit is contained in:
Fangyin Cheng
2024-07-05 15:20:21 +08:00
committed by GitHub
parent 84fc1fc7fe
commit bf978d2bf9
39 changed files with 1176 additions and 218 deletions

View File

@@ -1,4 +1,5 @@
from dbgpt.util.tracer.base import (
DBGPT_TRACER_SPAN_ID,
Span,
SpanStorage,
SpanStorageType,
@@ -28,6 +29,7 @@ __all__ = [
"SpanStorage",
"SpanStorageType",
"TracerContext",
"DBGPT_TRACER_SPAN_ID",
"MemorySpanStorage",
"FileSpanStorage",
"SpanStorageContainer",

View File

@@ -1,15 +1,24 @@
from __future__ import annotations
import json
import secrets
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from dbgpt.component import BaseComponent, ComponentType, SystemApp
DBGPT_TRACER_SPAN_ID = "DB-GPT-Trace-Span-Id"
# Compatibility with OpenTelemetry API
_TRACE_ID_MAX_VALUE = 2**128 - 1
_SPAN_ID_MAX_VALUE = 2**64 - 1
INVALID_SPAN_ID = 0x0000000000000000
INVALID_TRACE_ID = 0x00000000000000000000000000000000
class SpanType(str, Enum):
BASE = "base"
@@ -60,7 +69,7 @@ class Span:
# Timestamp when this span ended, initially None
self.end_time = None
# Additional metadata associated with the span
self.metadata = metadata
self.metadata = metadata or {}
self._end_callers = []
if end_caller:
self._end_callers.append(end_caller)
@@ -91,13 +100,17 @@ class Span:
"span_id": self.span_id,
"parent_span_id": self.parent_span_id,
"operation_name": self.operation_name,
"start_time": None
if not self.start_time
else self.start_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
"end_time": None
if not self.end_time
else self.end_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
"metadata": _clean_for_json(self.metadata),
"start_time": (
None
if not self.start_time
else self.start_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
),
"end_time": (
None
if not self.end_time
else self.end_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
),
"metadata": _clean_for_json(self.metadata) if self.metadata else None,
}
def copy(self) -> Span:
@@ -200,6 +213,60 @@ class Tracer(BaseComponent, ABC):
"""
return str(uuid.uuid4())
def _new_random_trace_id(self) -> str:
"""Create a new random trace ID."""
return _new_random_trace_id()
def _new_random_span_id(self) -> str:
"""Create a new random span ID."""
return _new_random_span_id()
def _new_random_trace_id() -> str:
"""Create a new random trace ID."""
# Generate a 128-bit hex string
return secrets.token_hex(16)
def _is_valid_trace_id(trace_id: Union[str, int]) -> bool:
if isinstance(trace_id, str):
try:
trace_id = int(trace_id, 16)
except ValueError:
return False
return INVALID_TRACE_ID < int(trace_id) <= _TRACE_ID_MAX_VALUE
def _new_random_span_id() -> str:
"""Create a new random span ID."""
# Generate a 64-bit hex string
return secrets.token_hex(8)
def _is_valid_span_id(span_id: Union[str, int]) -> bool:
if isinstance(span_id, str):
try:
span_id = int(span_id, 16)
except ValueError:
return False
return INVALID_SPAN_ID < int(span_id) <= _SPAN_ID_MAX_VALUE
def _split_span_id(span_id: str) -> Tuple[int, int]:
parent_span_id_parts = span_id.split(":")
if len(parent_span_id_parts) != 2:
return 0, 0
trace_id, parent_span_id = parent_span_id_parts
try:
trace_id = int(trace_id, 16)
span_id = int(parent_span_id, 16)
return trace_id, span_id
except ValueError:
return 0, 0
@dataclass
class TracerContext:
@@ -240,3 +307,28 @@ def _clean_for_json(data: Optional[str, Any] = None):
return data
except TypeError:
return None
def _parse_span_id(body: Any) -> Optional[str]:
from starlette.requests import Request
from dbgpt._private.pydantic import BaseModel, model_to_dict
span_id: Optional[str] = None
if isinstance(body, Request):
span_id = body.headers.get(DBGPT_TRACER_SPAN_ID)
elif isinstance(body, dict):
span_id = body.get(DBGPT_TRACER_SPAN_ID) or body.get("span_id")
elif isinstance(body, BaseModel):
dict_body = model_to_dict(body)
span_id = dict_body.get(DBGPT_TRACER_SPAN_ID) or dict_body.get("span_id")
if not span_id:
return None
else:
int_trace_id, int_span_id = _split_span_id(span_id)
if not int_trace_id:
return None
if _is_valid_span_id(int_span_id) and _is_valid_trace_id(int_trace_id):
return span_id
else:
return span_id

View File

@@ -0,0 +1,122 @@
from typing import Dict, List, Optional
from .base import Span, SpanStorage, _split_span_id
try:
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import Span as OTSpan
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace import SpanContext, SpanKind
except ImportError:
raise ImportError(
"To use OpenTelemetrySpanStorage, you must install opentelemetry-api, "
"opentelemetry-sdk and opentelemetry-exporter-otlp."
"You can install it via `pip install opentelemetry-api opentelemetry-sdk "
"opentelemetry-exporter-otlp`"
)
class OpenTelemetrySpanStorage(SpanStorage):
"""OpenTelemetry span storage."""
def __init__(
self,
service_name: str,
otlp_endpoint: Optional[str] = None,
otlp_insecure: Optional[bool] = None,
otlp_timeout: Optional[int] = None,
):
super().__init__()
self.service_name = service_name
resource = Resource(attributes={"service.name": service_name})
self.tracer_provider = TracerProvider(resource=resource)
self.tracer = self.tracer_provider.get_tracer(__name__)
# Store the spans that have not ended
self.spans: Dict[str, OTSpan] = {}
otlp_exporter = OTLPSpanExporter(
endpoint=otlp_endpoint,
insecure=otlp_insecure,
timeout=otlp_timeout,
)
span_processor = BatchSpanProcessor(otlp_exporter)
self.tracer_provider.add_span_processor(span_processor)
trace.set_tracer_provider(self.tracer_provider)
def append_span(self, span: Span):
span_id = span.span_id
if span_id in self.spans:
otel_span = self.spans.pop(span_id)
# Update the end time and attributes of the span
end_time = int(span.end_time.timestamp() * 1e9) if span.end_time else None
if span.metadata:
for key, value in span.metadata.items():
if isinstance(value, (bool, str, bytes, int, float)) or (
isinstance(value, list)
and all(
isinstance(i, (bool, str, bytes, int, float)) for i in value
)
):
otel_span.set_attribute(key, value)
if end_time:
otel_span.end(end_time=end_time)
else:
otel_span.end()
else:
parent_context = self._create_parent_context(span)
# Datetime -> int
start_time = int(span.start_time.timestamp() * 1e9)
otel_span = self.tracer.start_span(
span.operation_name,
context=parent_context,
kind=SpanKind.INTERNAL,
start_time=start_time,
)
otel_span.set_attribute("dbgpt_trace_id", span.trace_id)
otel_span.set_attribute("dbgpt_span_id", span.span_id)
if span.parent_span_id:
otel_span.set_attribute("dbgpt_parent_span_id", span.parent_span_id)
otel_span.set_attribute("span_type", span.span_type.value)
if span.metadata:
for key, value in span.metadata.items():
if isinstance(value, (bool, str, bytes, int, float)) or (
isinstance(value, list)
and all(
isinstance(i, (bool, str, bytes, int, float)) for i in value
)
):
otel_span.set_attribute(key, value)
if not span.end_time:
self.spans[span_id] = otel_span
def append_span_batch(self, spans: List[Span]):
for span in spans:
self.append_span(span)
def _create_parent_context(self, span: Span):
if not span.parent_span_id:
return trace.set_span_in_context(trace.INVALID_SPAN)
trace_id, parent_span_id = _split_span_id(span.parent_span_id)
if not trace_id:
return trace.set_span_in_context(trace.INVALID_SPAN)
span_context = SpanContext(
trace_id=trace_id,
span_id=parent_span_id,
is_remote=True,
trace_flags=trace.TraceFlags(0x01), # Default: SAMPLED
)
return trace.set_span_in_context(trace.NonRecordingSpan(span_context))
def close(self):
self.tracer_provider.shutdown()

View File

@@ -249,7 +249,7 @@ def chat(
for sp in spans:
span_type = sp["span_type"]
metadata = sp.get("metadata")
if span_type == SpanType.RUN:
if span_type == SpanType.RUN and metadata and "run_service" in metadata:
service_name = metadata["run_service"]
service_spans[service_name] = sp.copy()
if set(service_spans.keys()) == service_names and found_trace_id:

View File

@@ -3,7 +3,7 @@ import inspect
import logging
from contextvars import ContextVar
from functools import wraps
from typing import Dict, Optional
from typing import Any, AsyncIterator, Dict, Optional
from dbgpt.component import ComponentType, SystemApp
from dbgpt.util.module_utils import import_from_checked_string
@@ -46,9 +46,12 @@ class DefaultTracer(Tracer):
metadata: Dict = None,
) -> Span:
trace_id = (
self._new_uuid() if parent_span_id is None else parent_span_id.split(":")[0]
self._new_random_trace_id()
if parent_span_id is None
else parent_span_id.split(":")[0]
)
span_id = f"{trace_id}:{self._new_uuid()}"
span_id = f"{trace_id}:{self._new_random_span_id()}"
span = Span(
trace_id,
span_id,
@@ -164,6 +167,33 @@ class TracerManager:
current_span = self.get_current_span()
return current_span.span_type if current_span else None
def _parse_span_id(self, body: Any) -> Optional[str]:
from .base import _parse_span_id
return _parse_span_id(body)
def wrapper_async_stream(
self,
generator: AsyncIterator[Any],
operation_name: str,
parent_span_id: str = None,
span_type: SpanType = None,
metadata: Dict = None,
) -> AsyncIterator[Any]:
"""Wrap an async generator with a span"""
parent_span_id = parent_span_id or self.get_current_span_id()
async def wrapper():
span = self.start_span(operation_name, parent_span_id, span_type, metadata)
try:
async for item in generator:
yield item
finally:
span.end()
return wrapper()
root_tracer: TracerManager = TracerManager()
@@ -206,10 +236,14 @@ def _parse_operation_name(func, *args):
def initialize_tracer(
tracer_filename: str,
root_operation_name: str = "DB-GPT-Web-Entry",
root_operation_name: str = "DB-GPT-Webserver",
system_app: Optional[SystemApp] = None,
tracer_storage_cls: Optional[str] = None,
create_system_app: bool = False,
enable_open_telemetry: bool = False,
otlp_endpoint: Optional[str] = None,
otlp_insecure: Optional[bool] = None,
otlp_timeout: Optional[int] = None,
):
"""Initialize the tracer with the given filename and system app."""
from dbgpt.util.tracer.span_storage import FileSpanStorage, SpanStorageContainer
@@ -227,6 +261,17 @@ def initialize_tracer(
storage_container = SpanStorageContainer(system_app)
storage_container.append_storage(FileSpanStorage(tracer_filename))
if enable_open_telemetry:
from dbgpt.util.tracer.opentelemetry import OpenTelemetrySpanStorage
storage_container.append_storage(
OpenTelemetrySpanStorage(
service_name=root_operation_name,
otlp_endpoint=otlp_endpoint,
otlp_insecure=otlp_insecure,
otlp_timeout=otlp_timeout,
)
)
if tracer_storage_cls:
logger.info(f"Begin parse storage class {tracer_storage_cls}")

View File

@@ -1,4 +1,4 @@
import uuid
import logging
from contextvars import ContextVar
from starlette.middleware.base import BaseHTTPMiddleware
@@ -7,7 +7,11 @@ from starlette.types import ASGIApp
from dbgpt.util.tracer import Tracer, TracerContext
_DEFAULT_EXCLUDE_PATHS = ["/api/controller/heartbeat"]
from .base import _parse_span_id
_DEFAULT_EXCLUDE_PATHS = ["/api/controller/heartbeat", "/api/health"]
logger = logging.getLogger(__name__)
class TraceIDMiddleware(BaseHTTPMiddleware):
@@ -33,11 +37,12 @@ class TraceIDMiddleware(BaseHTTPMiddleware):
):
return await call_next(request)
span_id = request.headers.get("DBGPT_TRACER_SPAN_ID")
# if not span_id:
# span_id = str(uuid.uuid4())
# self.trace_context_var.set(TracerContext(span_id=span_id))
# Read trace_id from request headers
span_id = _parse_span_id(request)
logger.debug(
f"TraceIDMiddleware: span_id={span_id}, path={request.url.path}, "
f"headers={request.headers}"
)
with self.tracer.start_span(
self.root_operation_name, span_id, metadata={"path": request.url.path}
):