mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-11 05:45:01 +00:00
Send evaluator logs to new session (#7206)
Also stop specifying "eval" mode since explicit project modes are deprecated
This commit is contained in:
parent
0dc700eebf
commit
75aa408f10
@ -6,6 +6,7 @@ from uuid import UUID
|
|||||||
|
|
||||||
from langchainplus_sdk import LangChainPlusClient, RunEvaluator
|
from langchainplus_sdk import LangChainPlusClient, RunEvaluator
|
||||||
|
|
||||||
|
from langchain.callbacks.manager import tracing_v2_enabled
|
||||||
from langchain.callbacks.tracers.base import BaseTracer
|
from langchain.callbacks.tracers.base import BaseTracer
|
||||||
from langchain.callbacks.tracers.schemas import Run
|
from langchain.callbacks.tracers.schemas import Run
|
||||||
|
|
||||||
@ -27,6 +28,8 @@ class EvaluatorCallbackHandler(BaseTracer):
|
|||||||
If not specified, a new instance will be created.
|
If not specified, a new instance will be created.
|
||||||
example_id : Union[UUID, str], optional
|
example_id : Union[UUID, str], optional
|
||||||
The example ID to be associated with the runs.
|
The example ID to be associated with the runs.
|
||||||
|
project_name : str, optional
|
||||||
|
The LangSmith project name to be organize eval chain runs under.
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
----------
|
----------
|
||||||
@ -40,6 +43,8 @@ class EvaluatorCallbackHandler(BaseTracer):
|
|||||||
The thread pool executor used for running the evaluators.
|
The thread pool executor used for running the evaluators.
|
||||||
futures : Set[Future]
|
futures : Set[Future]
|
||||||
The set of futures representing the running evaluators.
|
The set of futures representing the running evaluators.
|
||||||
|
project_name : Optional[str]
|
||||||
|
The LangSmith project name to be organize eval chain runs under.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name = "evaluator_callback_handler"
|
name = "evaluator_callback_handler"
|
||||||
@ -50,6 +55,7 @@ class EvaluatorCallbackHandler(BaseTracer):
|
|||||||
max_workers: Optional[int] = None,
|
max_workers: Optional[int] = None,
|
||||||
client: Optional[LangChainPlusClient] = None,
|
client: Optional[LangChainPlusClient] = None,
|
||||||
example_id: Optional[Union[UUID, str]] = None,
|
example_id: Optional[Union[UUID, str]] = None,
|
||||||
|
project_name: Optional[str] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
@ -62,10 +68,24 @@ class EvaluatorCallbackHandler(BaseTracer):
|
|||||||
max_workers=max(max_workers or len(evaluators), 1)
|
max_workers=max(max_workers or len(evaluators), 1)
|
||||||
)
|
)
|
||||||
self.futures: Set[Future] = set()
|
self.futures: Set[Future] = set()
|
||||||
|
self.project_name = project_name
|
||||||
|
|
||||||
def _evaluate_run(self, run: Run, evaluator: RunEvaluator) -> None:
|
def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
|
||||||
|
"""Evaluate the run in the project.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
run : Run
|
||||||
|
The run to be evaluated.
|
||||||
|
evaluator : RunEvaluator
|
||||||
|
The evaluator to use for evaluating the run.
|
||||||
|
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
self.client.evaluate_run(run, evaluator)
|
if self.project_name is None:
|
||||||
|
self.client.evaluate_run(run, evaluator)
|
||||||
|
with tracing_v2_enabled(project_name=self.project_name):
|
||||||
|
self.client.evaluate_run(run, evaluator)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Error evaluating run {run.id} with "
|
f"Error evaluating run {run.id} with "
|
||||||
@ -86,7 +106,9 @@ class EvaluatorCallbackHandler(BaseTracer):
|
|||||||
run_ = run.copy()
|
run_ = run.copy()
|
||||||
run_.reference_example_id = self.example_id
|
run_.reference_example_id = self.example_id
|
||||||
for evaluator in self.evaluators:
|
for evaluator in self.evaluators:
|
||||||
self.futures.add(self.executor.submit(self._evaluate_run, run_, evaluator))
|
self.futures.add(
|
||||||
|
self.executor.submit(self._evaluate_in_project, run_, evaluator)
|
||||||
|
)
|
||||||
|
|
||||||
def wait_for_futures(self) -> None:
|
def wait_for_futures(self) -> None:
|
||||||
"""Wait for all futures to complete."""
|
"""Wait for all futures to complete."""
|
||||||
|
@ -313,28 +313,35 @@ async def _callbacks_initializer(
|
|||||||
project_name: Optional[str],
|
project_name: Optional[str],
|
||||||
client: LangChainPlusClient,
|
client: LangChainPlusClient,
|
||||||
run_evaluators: Sequence[RunEvaluator],
|
run_evaluators: Sequence[RunEvaluator],
|
||||||
|
evaluation_handler_collector: List[EvaluatorCallbackHandler],
|
||||||
) -> List[BaseTracer]:
|
) -> List[BaseTracer]:
|
||||||
"""
|
"""
|
||||||
Initialize a tracer to share across tasks.
|
Initialize a tracer to share across tasks.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
project_name: The project name for the tracer.
|
project_name: The project name for the tracer.
|
||||||
|
client: The client to use for the tracer.
|
||||||
|
run_evaluators: The evaluators to run.
|
||||||
|
evaluation_handler_collector: A list to collect the evaluators.
|
||||||
|
Used to wait for the evaluators to finish.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A LangChainTracer instance with an active project.
|
The callbacks for this thread.
|
||||||
"""
|
"""
|
||||||
callbacks: List[BaseTracer] = []
|
callbacks: List[BaseTracer] = []
|
||||||
if project_name:
|
if project_name:
|
||||||
callbacks.append(LangChainTracer(project_name=project_name))
|
callbacks.append(LangChainTracer(project_name=project_name))
|
||||||
|
evaluator_project_name = f"{project_name}-evaluators" if project_name else None
|
||||||
if run_evaluators:
|
if run_evaluators:
|
||||||
callbacks.append(
|
callback = EvaluatorCallbackHandler(
|
||||||
EvaluatorCallbackHandler(
|
client=client,
|
||||||
client=client,
|
evaluators=run_evaluators,
|
||||||
evaluators=run_evaluators,
|
# We already have concurrency, don't want to overload the machine
|
||||||
# We already have concurrency, don't want to overload the machine
|
max_workers=1,
|
||||||
max_workers=1,
|
project_name=evaluator_project_name,
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
callbacks.append(callback)
|
||||||
|
evaluation_handler_collector.append(callback)
|
||||||
return callbacks
|
return callbacks
|
||||||
|
|
||||||
|
|
||||||
@ -382,12 +389,9 @@ async def arun_on_examples(
|
|||||||
"""
|
"""
|
||||||
project_name = _get_project_name(project_name, llm_or_chain_factory, None)
|
project_name = _get_project_name(project_name, llm_or_chain_factory, None)
|
||||||
client_ = client or LangChainPlusClient()
|
client_ = client or LangChainPlusClient()
|
||||||
client_.create_project(project_name, mode="eval")
|
client_.create_project(project_name)
|
||||||
|
|
||||||
results: Dict[str, List[Any]] = {}
|
results: Dict[str, List[Any]] = {}
|
||||||
evaluation_handler = EvaluatorCallbackHandler(
|
|
||||||
evaluators=run_evaluators or [], client=client_
|
|
||||||
)
|
|
||||||
|
|
||||||
async def process_example(
|
async def process_example(
|
||||||
example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
|
example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
|
||||||
@ -410,17 +414,20 @@ async def arun_on_examples(
|
|||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
evaluation_handlers: List[EvaluatorCallbackHandler] = []
|
||||||
await _gather_with_concurrency(
|
await _gather_with_concurrency(
|
||||||
concurrency_level,
|
concurrency_level,
|
||||||
functools.partial(
|
functools.partial(
|
||||||
_callbacks_initializer,
|
_callbacks_initializer,
|
||||||
project_name=project_name,
|
project_name=project_name,
|
||||||
client=client_,
|
client=client_,
|
||||||
|
evaluation_handler_collector=evaluation_handlers,
|
||||||
run_evaluators=run_evaluators or [],
|
run_evaluators=run_evaluators or [],
|
||||||
),
|
),
|
||||||
*(functools.partial(process_example, e) for e in examples),
|
*(functools.partial(process_example, e) for e in examples),
|
||||||
)
|
)
|
||||||
evaluation_handler.wait_for_futures()
|
for handler in evaluation_handlers:
|
||||||
|
handler.wait_for_futures()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
@ -581,10 +588,13 @@ def run_on_examples(
|
|||||||
results: Dict[str, Any] = {}
|
results: Dict[str, Any] = {}
|
||||||
project_name = _get_project_name(project_name, llm_or_chain_factory, None)
|
project_name = _get_project_name(project_name, llm_or_chain_factory, None)
|
||||||
client_ = client or LangChainPlusClient()
|
client_ = client or LangChainPlusClient()
|
||||||
client_.create_project(project_name, mode="eval")
|
client_.create_project(project_name)
|
||||||
tracer = LangChainTracer(project_name=project_name)
|
tracer = LangChainTracer(project_name=project_name)
|
||||||
|
evaluator_project_name = f"{project_name}-evaluators"
|
||||||
evalution_handler = EvaluatorCallbackHandler(
|
evalution_handler = EvaluatorCallbackHandler(
|
||||||
evaluators=run_evaluators or [], client=client_
|
evaluators=run_evaluators or [],
|
||||||
|
client=client_,
|
||||||
|
project_name=evaluator_project_name,
|
||||||
)
|
)
|
||||||
callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
|
callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
|
||||||
for i, example in enumerate(examples):
|
for i, example in enumerate(examples):
|
||||||
|
Loading…
Reference in New Issue
Block a user