Compare commits

...

1 Commits

Author SHA1 Message Date
vowelparrot
cddfe05073 Send evaluator logs to new session 2023-06-28 15:45:45 -07:00
2 changed files with 39 additions and 13 deletions

View File

@@ -5,6 +5,7 @@ from uuid import UUID
from langchainplus_sdk import LangChainPlusClient, RunEvaluator
from langchain.callbacks.manager import tracing_v2_enabled
from langchain.callbacks.tracers.base import BaseTracer
from langchain.callbacks.tracers.schemas import Run
@@ -47,6 +48,7 @@ class EvaluatorCallbackHandler(BaseTracer):
max_workers: Optional[int] = None,
client: Optional[LangChainPlusClient] = None,
example_id: Optional[Union[UUID, str]] = None,
project_name: Optional[str] = None,
**kwargs: Any
) -> None:
super().__init__(**kwargs)
@@ -59,6 +61,23 @@ class EvaluatorCallbackHandler(BaseTracer):
max_workers=max(max_workers or len(evaluators), 1)
)
self.futures: Set[Future] = set()
self.project_name = project_name
def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
"""Evaluate the run in the project.
Parameters
----------
run : Run
The run to be evaluated.
evaluator : RunEvaluator
The evaluator to use for evaluating the run.
"""
if self.project_name is None:
return self.client.evaluate_run(run, evaluator)
with tracing_v2_enabled(project_name=self.project_name):
return self.client.evaluate_run(run, evaluator)
def _persist_run(self, run: Run) -> None:
"""Run the evaluator on the run.
@@ -73,7 +92,7 @@ class EvaluatorCallbackHandler(BaseTracer):
run_.reference_example_id = self.example_id
for evaluator in self.evaluators:
self.futures.add(
self.executor.submit(self.client.evaluate_run, run_, evaluator)
self.executor.submit(self._evaluate_in_project, run_, evaluator)
)
def wait_for_futures(self) -> None:

View File

@@ -296,12 +296,14 @@ async def _callbacks_initializer(
project_name: Optional[str],
client: LangChainPlusClient,
run_evaluators: Sequence[RunEvaluator],
evaluation_handler_collector: List[EvaluatorCallbackHandler],
) -> List[BaseTracer]:
"""
Initialize a tracer to share across tasks.
Args:
project_name: The project name for the tracer.
client: The client to use for the tracer.
Returns:
A LangChainTracer instance with an active project.
@@ -309,15 +311,17 @@ async def _callbacks_initializer(
callbacks: List[BaseTracer] = []
if project_name:
callbacks.append(LangChainTracer(project_name=project_name))
evaluator_project_name = f"{project_name}-evaluators" if project_name else None
if run_evaluators:
callbacks.append(
EvaluatorCallbackHandler(
client=client,
evaluators=run_evaluators,
# We already have concurrency, don't want to overload the machine
max_workers=1,
)
callback = EvaluatorCallbackHandler(
client=client,
evaluators=run_evaluators,
# We already have concurrency, don't want to overload the machine
max_workers=1,
project_name=evaluator_project_name,
)
callbacks.append(callback)
evaluation_handler_collector.append(callback)
return callbacks
@@ -362,9 +366,6 @@ async def arun_on_examples(
client_.create_project(project_name, mode="eval")
results: Dict[str, List[Any]] = {}
evaluation_handler = EvaluatorCallbackHandler(
evaluators=run_evaluators or [], client=client_
)
async def process_example(
example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
@@ -386,17 +387,20 @@ async def arun_on_examples(
flush=True,
)
evaluation_handlers: List[EvaluatorCallbackHandler] = []
await _gather_with_concurrency(
concurrency_level,
functools.partial(
_callbacks_initializer,
project_name=project_name,
client=client_,
evaluation_handler_collector=evaluation_handlers,
run_evaluators=run_evaluators or [],
),
*(functools.partial(process_example, e) for e in examples),
)
evaluation_handler.wait_for_futures()
for handler in evaluation_handlers:
handler.wait_for_futures()
return results
@@ -537,8 +541,11 @@ def run_on_examples(
client_ = client or LangChainPlusClient()
client_.create_project(project_name, mode="eval")
tracer = LangChainTracer(project_name=project_name)
evaluator_project_name = f"{project_name}-evaluators"
evalution_handler = EvaluatorCallbackHandler(
evaluators=run_evaluators or [], client=client_
evaluators=run_evaluators or [],
client=client_,
project_name=evaluator_project_name,
)
callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
for i, example in enumerate(examples):