Delete deprecated run evaluator loaders

Switch to langsmith
2026-01-23 13:19:22 +00:00 · 2023-07-10 10:52:19 -07:00 · 2023-07-10 10:49:37 -07:00
17 changed files with 1295 additions and 1096 deletions
--- a/langchain/callbacks/tracers/evaluation.py
+++ b/langchain/callbacks/tracers/evaluation.py
@@ -4,7 +4,7 @@ from concurrent.futures import Future, ThreadPoolExecutor, wait
 from typing import Any, Optional, Sequence, Set, Union
 from uuid import UUID

-from langchainplus_sdk import LangChainPlusClient, RunEvaluator
+from langsmith import Client, RunEvaluator

 from langchain.callbacks.manager import tracing_v2_enabled
 from langchain.callbacks.tracers.base import BaseTracer
@@ -23,8 +23,8 @@ class EvaluatorCallbackHandler(BaseTracer):
    max_workers : int, optional
        The maximum number of worker threads to use for running the evaluators.
        If not specified, it will default to the number of evaluators.
-    client : LangChainPlusClient, optional
-        The LangChainPlusClient instance to use for evaluating the runs.
+    client : LangSmith Client, optional
+        The LangSmith client instance to use for evaluating the runs.
        If not specified, a new instance will be created.
    example_id : Union[UUID, str], optional
        The example ID to be associated with the runs.
@@ -35,8 +35,8 @@ class EvaluatorCallbackHandler(BaseTracer):
    ----------
    example_id : Union[UUID, None]
        The example ID associated with the runs.
-    client : LangChainPlusClient
-        The LangChainPlusClient instance used for evaluating the runs.
+    client : Client
+        The LangSmith client instance used for evaluating the runs.
    evaluators : Sequence[RunEvaluator]
        The sequence of run evaluators to be executed.
    executor : ThreadPoolExecutor
@@ -56,7 +56,7 @@ class EvaluatorCallbackHandler(BaseTracer):
        self,
        evaluators: Sequence[RunEvaluator],
        max_workers: Optional[int] = None,
-        client: Optional[LangChainPlusClient] = None,
+        client: Optional[Client] = None,
        example_id: Optional[Union[UUID, str]] = None,
        skip_unfinished: bool = True,
        project_name: Optional[str] = None,
@@ -66,7 +66,7 @@ class EvaluatorCallbackHandler(BaseTracer):
        self.example_id = (
            UUID(example_id) if isinstance(example_id, str) else example_id
        )
-        self.client = client or LangChainPlusClient()
+        self.client = client or Client()
        self.evaluators = evaluators
        self.executor = ThreadPoolExecutor(
            max_workers=max(max_workers or len(evaluators), 1)
--- a/langchain/callbacks/tracers/langchain.py
+++ b/langchain/callbacks/tracers/langchain.py
@@ -8,7 +8,7 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Union
 from uuid import UUID

-from langchainplus_sdk import LangChainPlusClient
+from langsmith import Client

 from langchain.callbacks.tracers.base import BaseTracer
 from langchain.callbacks.tracers.schemas import Run, RunTypeEnum, TracerSession
@@ -44,7 +44,7 @@ class LangChainTracer(BaseTracer):
        self,
        example_id: Optional[Union[UUID, str]] = None,
        project_name: Optional[str] = None,
-        client: Optional[LangChainPlusClient] = None,
+        client: Optional[Client] = None,
        tags: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> None:
@@ -59,7 +59,7 @@ class LangChainTracer(BaseTracer):
        )
        # set max_workers to 1 to process tasks in order
        self.executor = ThreadPoolExecutor(max_workers=1)
-        self.client = client or LangChainPlusClient()
+        self.client = client or Client()
        self._futures: Set[Future] = set()
        self.tags = tags or []
        global _TRACERS
--- a/langchain/callbacks/tracers/schemas.py
+++ b/langchain/callbacks/tracers/schemas.py
@@ -5,8 +5,8 @@ import datetime
 from typing import Any, Dict, List, Optional
 from uuid import UUID

-from langchainplus_sdk.schemas import RunBase as BaseRunV2
-from langchainplus_sdk.schemas import RunTypeEnum
+from langsmith.schemas import RunBase as BaseRunV2
+from langsmith.schemas import RunTypeEnum
 from pydantic import BaseModel, Field, root_validator

 from langchain.schema import LLMResult
--- a/langchain/client/runner_utils.py
+++ b/langchain/client/runner_utils.py
@@ -18,8 +18,8 @@ from typing import (
    Union,
 )

-from langchainplus_sdk import LangChainPlusClient, RunEvaluator
-from langchainplus_sdk.schemas import Example
+from langsmith import Client, RunEvaluator
+from langsmith.schemas import Example

 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.callbacks.manager import Callbacks
@@ -306,7 +306,7 @@ async def _gather_with_concurrency(

 async def _callbacks_initializer(
    project_name: Optional[str],
-    client: LangChainPlusClient,
+    client: Client,
    run_evaluators: Sequence[RunEvaluator],
    evaluation_handler_collector: List[EvaluatorCallbackHandler],
 ) -> List[BaseTracer]:
@@ -348,7 +348,7 @@ async def arun_on_examples(
    num_repetitions: int = 1,
    project_name: Optional[str] = None,
    verbose: bool = False,
-    client: Optional[LangChainPlusClient] = None,
+    client: Optional[Client] = None,
    tags: Optional[List[str]] = None,
    run_evaluators: Optional[Sequence[RunEvaluator]] = None,
    input_mapper: Optional[Callable[[Dict], Any]] = None,
@@ -369,7 +369,7 @@ async def arun_on_examples(
        project_name: Project name to use when tracing runs.
            Defaults to {dataset_name}-{chain class name}-{datetime}.
        verbose: Whether to print progress.
-        client: Client to use to read the dataset. If not provided, a new
+        client: LangSmith client to use to read the dataset. If not provided, a new
            client will be created using the credentials in the environment.
        tags: Tags to add to each run in the project.
        run_evaluators: Evaluators to run on the results of the chain.
@@ -383,7 +383,7 @@ async def arun_on_examples(
        A dictionary mapping example ids to the model outputs.
    """
    project_name = _get_project_name(project_name, llm_or_chain_factory, None)
-    client_ = client or LangChainPlusClient()
+    client_ = client or Client()
    client_.create_project(project_name)

    results: Dict[str, List[Any]] = {}
@@ -548,7 +548,7 @@ def run_on_examples(
    num_repetitions: int = 1,
    project_name: Optional[str] = None,
    verbose: bool = False,
-    client: Optional[LangChainPlusClient] = None,
+    client: Optional[Client] = None,
    tags: Optional[List[str]] = None,
    run_evaluators: Optional[Sequence[RunEvaluator]] = None,
    input_mapper: Optional[Callable[[Dict], Any]] = None,
@@ -568,7 +568,7 @@ def run_on_examples(
        project_name: Name of the project to store the traces in.
            Defaults to {dataset_name}-{chain class name}-{datetime}.
        verbose: Whether to print progress.
-        client: Client to use to access the dataset. If None, a new client
+        client: LangSmith client to use to access the dataset. If None, a new client
            will be created using the credentials in the environment.
        tags: Tags to add to each run in the project.
        run_evaluators: Evaluators to run on the results of the chain.
@@ -583,7 +583,7 @@ def run_on_examples(
    """
    results: Dict[str, Any] = {}
    project_name = _get_project_name(project_name, llm_or_chain_factory, None)
-    client_ = client or LangChainPlusClient()
+    client_ = client or Client()
    client_.create_project(project_name)
    tracer = LangChainTracer(project_name=project_name)
    evaluator_project_name = f"{project_name}-evaluators"
@@ -645,7 +645,7 @@ async def arun_on_dataset(
    num_repetitions: int = 1,
    project_name: Optional[str] = None,
    verbose: bool = False,
-    client: Optional[LangChainPlusClient] = None,
+    client: Optional[Client] = None,
    tags: Optional[List[str]] = None,
    run_evaluators: Optional[Sequence[RunEvaluator]] = None,
    input_mapper: Optional[Callable[[Dict], Any]] = None,
@@ -666,8 +666,8 @@ async def arun_on_dataset(
        project_name: Name of the project to store the traces in.
            Defaults to {dataset_name}-{chain class name}-{datetime}.
        verbose: Whether to print progress.
-        client: Client to use to read the dataset. If not provided,
-            a new client will be created using the credentials in the environment.
+        client: LangSmith client to use to read the dataset. If not provided, a new
+            client will be created using the credentials in the environment.
        tags: Tags to add to each run in the project.
        run_evaluators: Evaluators to run on the results of the chain.
        input_mapper: A function to map to the inputs dictionary from an Example
@@ -678,7 +678,7 @@ async def arun_on_dataset(
    Returns:
        A dictionary containing the run's project name and the resulting model outputs.
    """
-    client_ = client or LangChainPlusClient()
+    client_ = client or Client()
    project_name = _get_project_name(project_name, llm_or_chain_factory, dataset_name)
    dataset = client_.read_dataset(dataset_name=dataset_name)
    examples = client_.list_examples(dataset_id=str(dataset.id))
@@ -707,7 +707,7 @@ def run_on_dataset(
    num_repetitions: int = 1,
    project_name: Optional[str] = None,
    verbose: bool = False,
-    client: Optional[LangChainPlusClient] = None,
+    client: Optional[Client] = None,
    tags: Optional[List[str]] = None,
    run_evaluators: Optional[Sequence[RunEvaluator]] = None,
    input_mapper: Optional[Callable[[Dict], Any]] = None,
@@ -727,8 +727,8 @@ def run_on_dataset(
        project_name: Name of the project to store the traces in.
            Defaults to {dataset_name}-{chain class name}-{datetime}.
        verbose: Whether to print progress.
-        client: Client to use to access the dataset. If None,
-            a new client will be created using the credentials in the environment.
+        client: LangSmith client to use to access the dataset. If None, a new client
+            will be created using the credentials in the environment.
        tags: Tags to add to each run in the project.
        run_evaluators: Evaluators to run on the results of the chain.
        input_mapper: A function to map to the inputs dictionary from an Example
@@ -740,7 +740,7 @@ def run_on_dataset(
    Returns:
        A dictionary containing the run's project name and the resulting model outputs.
    """
-    client_ = client or LangChainPlusClient()
+    client_ = client or Client()
    project_name = _get_project_name(project_name, llm_or_chain_factory, dataset_name)
    dataset = client_.read_dataset(dataset_name=dataset_name)
    examples = client_.list_examples(dataset_id=str(dataset.id))
--- a/langchain/evaluation/run_evaluators/init.py
+++ b/langchain/evaluation/run_evaluators/init.py
@@ -4,13 +4,6 @@ from langchain.evaluation.run_evaluators.base import (
    RunEvaluatorInputMapper,
    RunEvaluatorOutputParser,
 )
-from langchain.evaluation.run_evaluators.implementations import (
-    ChoicesOutputParser,
-    StringRunEvaluatorInputMapper,
-    get_criteria_evaluator,
-    get_qa_evaluator,
-    get_trajectory_evaluator,
-)
 from langchain.evaluation.run_evaluators.loading import (
    load_run_evaluator_for_model,
    load_run_evaluators_for_model,
--- a/langchain/evaluation/run_evaluators/base.py
+++ b/langchain/evaluation/run_evaluators/base.py
@@ -3,8 +3,8 @@ from __future__ import annotations
 from abc import abstractmethod
 from typing import Any, Dict, List, Optional

-from langchainplus_sdk import EvaluationResult, RunEvaluator
-from langchainplus_sdk.schemas import Example, Run
+from langsmith import EvaluationResult, RunEvaluator
+from langsmith.schemas import Example, Run

 from langchain.callbacks.manager import (
    AsyncCallbackManagerForChainRun,
--- a/langchain/evaluation/run_evaluators/implementations.py
+++ b/langchain/evaluation/run_evaluators/implementations.py
@@ -1,306 +0,0 @@
-from typing import Any, Dict, Mapping, Optional, Sequence, Union
-
-from langchainplus_sdk.evaluation import EvaluationResult
-from langchainplus_sdk.schemas import Example, Run, RunTypeEnum
-from pydantic import BaseModel, Field
-
-from langchain.chat_models.base import BaseChatModel
-from langchain.evaluation.agents.trajectory_eval_chain import (
-    TrajectoryEvalChain,
-    TrajectoryOutputParser,
-)
-from langchain.evaluation.criteria.eval_chain import (
-    CriteriaEvalChain,
-    CriteriaResultOutputParser,
-)
-from langchain.evaluation.qa.eval_chain import QAEvalChain
-from langchain.evaluation.qa.eval_prompt import PROMPT as QA_DEFAULT_PROMPT
-from langchain.evaluation.qa.eval_prompt import SQL_PROMPT
-from langchain.evaluation.run_evaluators.base import (
-    RunEvaluatorChain,
-    RunEvaluatorInputMapper,
-    RunEvaluatorOutputParser,
-)
-from langchain.prompts.prompt import PromptTemplate
-from langchain.schema import BasePromptTemplate
-from langchain.schema.language_model import BaseLanguageModel
-from langchain.tools.base import BaseTool
-
-_QA_PROMPTS = {
-    "qa": QA_DEFAULT_PROMPT,
-    "sql": SQL_PROMPT,
-}
-
-
-class StringRunEvaluatorInputMapper(RunEvaluatorInputMapper, BaseModel):
-    """Maps the Run and Optional[Example] to a dictionary."""
-
-    prediction_map: Dict[str, str]
-    """Map from run outputs to the evaluation inputs."""
-    input_map: Dict[str, str]
-    """Map from run inputs to the evaluation inputs."""
-    answer_map: Optional[Dict[str, str]] = None
-    """Map from example outputs to the evaluation inputs."""
-
-    def map(self, run: Run, example: Optional[Example] = None) -> Dict[str, Any]:
-        """Maps the Run and Optional[Example] to a dictionary"""
-        if run.outputs is None and self.prediction_map:
-            raise ValueError(f"Run {run.id} has no outputs.")
-        if self.answer_map and (not example or not example.outputs):
-            raise ValueError("This evaluator requires references, but none were given.")
-        outputs = run.outputs or {}
-        data = {value: outputs[key] for key, value in self.prediction_map.items()}
-        data.update({value: run.inputs[key] for key, value in self.input_map.items()})
-        if self.answer_map and example and example.outputs:
-            data.update(
-                {value: example.outputs[key] for key, value in self.answer_map.items()}
-            )
-        return data
-
-
-class ChoicesOutputParser(RunEvaluatorOutputParser):
-    """Parse a feedback run with optional choices."""
-
-    evaluation_name: str
-    choices_map: Optional[Dict[str, int]] = None
-
-    @property
-    def _type(self) -> str:
-        return "choices_run_eval"
-
-    def parse(self, text: str) -> EvaluationResult:
-        """Parse the last line of the text and return an evaluation result."""
-        lines = text.strip().split()
-        value = lines[-1].strip()
-        score = self.choices_map.get(value) if self.choices_map else None
-        comment = " ".join(lines[:-1]) if len(lines) > 1 else None
-        return EvaluationResult(
-            key=self.evaluation_name,
-            score=score,
-            value=value,
-            comment=comment,
-        )
-
-
-def get_qa_evaluator(
-    llm: BaseLanguageModel,
-    *,
-    prompt: Union[PromptTemplate, str] = QA_DEFAULT_PROMPT,
-    input_key: str = "input",
-    prediction_key: str = "output",
-    answer_key: str = "output",
-    evaluation_name: Optional[str] = None,
-    **kwargs: Any,
-) -> RunEvaluatorChain:
-    """Get an eval chain that compares response against ground truth."""
-    if isinstance(prompt, str):
-        prompt = _QA_PROMPTS[prompt]
-    eval_chain = QAEvalChain.from_llm(llm=llm, prompt=prompt, **kwargs)
-    input_mapper = kwargs.pop(
-        "input_mapper",
-        StringRunEvaluatorInputMapper(
-            input_map={input_key: "query"},
-            prediction_map={prediction_key: "result"},
-            answer_map={answer_key: "answer"},
-        ),
-    )
-    evaluation_name = evaluation_name or "Correctness"
-    output_parser = kwargs.pop(
-        "output_parser",
-        ChoicesOutputParser(
-            evaluation_name=evaluation_name,
-            choices_map={"CORRECT": 1, "INCORRECT": 0},
-        ),
-    )
-    tags = kwargs.pop("tags", [])
-    return RunEvaluatorChain(
-        eval_chain=eval_chain,
-        input_mapper=input_mapper,
-        output_parser=output_parser,
-        tags=tags + [evaluation_name],
-        **kwargs,
-    )
-
-
-class CriteriaOutputParser(RunEvaluatorOutputParser):
-    """Parse a criteria results into an evaluation result."""
-
-    evaluation_name: str
-
-    @property
-    def _type(self) -> str:
-        return "criteria"
-
-    def parse(self, parsed_output: Union[str, dict]) -> EvaluationResult:
-        """Parse the last line of the text and return an evaluation result."""
-        if isinstance(parsed_output, str):
-            parsed_output_ = CriteriaResultOutputParser().parse(parsed_output)
-        else:
-            parsed_output_ = parsed_output
-        return EvaluationResult(
-            key=self.evaluation_name,
-            score=parsed_output_["score"],
-            value=parsed_output_["value"],
-            comment=parsed_output_["reasoning"],
-        )
-
-
-def get_criteria_evaluator(
-    llm: BaseLanguageModel,
-    criteria: Union[Mapping[str, str], Sequence[str], str],
-    *,
-    input_key: str = "input",
-    prediction_key: str = "output",
-    prompt: Optional[BasePromptTemplate] = None,
-    evaluation_name: Optional[str] = None,
-    requires_reference: bool = False,
-    **kwargs: Any,
-) -> RunEvaluatorChain:
-    """Get an eval chain for grading a model's response against a map of criteria."""
-    input_mapper = kwargs.pop(
-        "input_mapper",
-        StringRunEvaluatorInputMapper(
-            input_map={input_key: "input"},
-            prediction_map={prediction_key: "output"},
-        ),
-    )
-    criteria_ = CriteriaEvalChain.resolve_criteria(criteria)
-    evaluation_name = evaluation_name or " ".join(criteria_.keys())
-    parser = kwargs.pop(
-        "output_parser",
-        CriteriaOutputParser(
-            choices_map={"Y": 1, "N": 0}, evaluation_name=evaluation_name
-        ),
-    )
-    tags = kwargs.pop("tags", [])
-    eval_chain = CriteriaEvalChain.from_llm(
-        llm=llm,
-        criteria=criteria_,
-        prompt=prompt,
-        requires_reference=requires_reference,
-        **kwargs,
-    )
-    return RunEvaluatorChain(
-        eval_chain=eval_chain,
-        input_mapper=input_mapper,
-        output_parser=parser,
-        tags=tags + [evaluation_name],
-        **kwargs,
-    )
-
-
-class TrajectoryRunEvalOutputParser(RunEvaluatorOutputParser, TrajectoryOutputParser):
-    evaluation_name: str = "Agent Trajectory"
-    """The name assigned to the evaluation feedback."""
-    evaluator_info: dict = Field(default_factory=dict)
-    """Additional information to log as feedback metadata."""
-
-    @property
-    def _type(self) -> str:
-        return "agent_trajectory_run_eval"
-
-    def parse_chain_output(self, output: Dict[str, Any]) -> EvaluationResult:
-        """Parse the output of a run."""
-        return EvaluationResult(
-            key=self.evaluation_name,
-            score=int(output["score"]),
-            comment=output["reasoning"],
-            evaluator_info=self.evaluator_info,
-        )
-
-
-class TrajectoryInputMapper(RunEvaluatorInputMapper, BaseModel):
-    """Maps the Run and Optional[Example] to a dictionary."""
-
-    agent_input_key: str = "input"
-    """The key to load from the agent executor's run input dictionary."""
-    agent_output_key: str = "output"
-    """The key to load from the agent executor's run output dictionary."""
-    tool_input_key: str = "input"
-    """The key to load from the tool executor's run input dictionary."""
-    tool_output_key: str = "output"
-    """The key to load from the tool executor's run output dictionary."""
-    reference_output_key: Optional[str] = None
-    """The key to use for selecting the reference answer."""
-
-    def map(self, run: Run, example: Optional[Example] = None) -> Dict[str, str]:
-        """Maps the Run and Optional[Example] to a dictionary"""
-        if run.child_runs is None:
-            raise ValueError("Run must have child runs to be evaluated.")
-        if run.outputs is None:
-            raise ValueError("Run must have outputs to be evaluated.")
-        reference = ""
-        if example is not None and example.outputs:
-            if self.reference_output_key is not None:
-                reference = example.outputs[self.reference_output_key]
-            elif "output" in example.outputs:
-                reference = example.outputs["output"]
-            elif len(example.outputs) == 1:
-                reference = next(iter(example.outputs.values()))
-            else:
-                raise ValueError("Could not infer the reference answer from ")
-
-        question = run.inputs[self.agent_input_key]
-        tool_runs = [
-            run_ for run_ in run.child_runs if run_.run_type == RunTypeEnum.tool
-        ]
-        agent_steps = []
-        for i, run_ in enumerate(tool_runs, 1):
-            tool_output = (
-                f"Tool output: {run_.outputs.get(self.tool_output_key, run_.outputs)}"
-                if run_.outputs
-                else (f"Tool error: {run_.error}" if run_.error else "No output")
-            )
-            agent_steps.append(
-                f"""Step {i}:
-Tool used: {run_.name}
-Tool input: {run_.inputs.get(self.tool_input_key, run_.inputs)}
-Tool output: {tool_output}"""
-            )
-
-        return {
-            "question": question,
-            "agent_trajectory": "\n\n".join(agent_steps),
-            "answer": run.outputs[self.agent_output_key],
-            "reference": reference,
-        }
-
-
-def get_trajectory_evaluator(
-    llm: BaseChatModel,
-    agent_tools: Sequence[BaseTool],
-    *,
-    input_key: str = "input",
-    prediction_key: str = "output",
-    tool_input_key: str = "input",
-    tool_output_key: str = "output",
-    reference_output_key: Optional[str] = None,
-    evaluation_name: str = "Agent Trajectory",
-    **kwargs: Any,
-) -> RunEvaluatorChain:
-    """Get an eval chain for grading a model's response against a map of criteria."""
-    input_mapper = kwargs.pop(
-        "input_mapper",
-        TrajectoryInputMapper(
-            agent_input_key=input_key,
-            agent_output_key=prediction_key,
-            tool_input_key=tool_input_key,
-            tool_output_key=tool_output_key,
-            reference_output_key=reference_output_key,
-        ),
-    )
-    parser = kwargs.pop(
-        "output_parser",
-        TrajectoryRunEvalOutputParser(evaluation_name=evaluation_name),
-    )
-    eval_chain = TrajectoryEvalChain.from_llm(
-        llm=llm, agent_tools=agent_tools, return_reasoning=True, **kwargs
-    )
-    tags = kwargs.pop("tags", [])
-    return RunEvaluatorChain(
-        eval_chain=eval_chain,
-        input_mapper=input_mapper,
-        output_parser=parser,
-        tags=tags + [evaluation_name],
-        **kwargs,
-    )
--- a/langchain/evaluation/run_evaluators/loading.py
+++ b/langchain/evaluation/run_evaluators/loading.py
@@ -1,7 +1,7 @@
 """"Loading helpers for run evaluators."""
 from typing import Any, List, Optional, Sequence, Union

-from langchainplus_sdk import RunEvaluator
+from langsmith import RunEvaluator

 from langchain.base_language import BaseLanguageModel
 from langchain.chains.base import Chain
--- a/langchain/evaluation/run_evaluators/string_run_evaluator.py
+++ b/langchain/evaluation/run_evaluators/string_run_evaluator.py
@@ -4,8 +4,8 @@ from __future__ import annotations
 from abc import abstractmethod
 from typing import Any, Dict, List, Optional, Union

-from langchainplus_sdk import EvaluationResult, RunEvaluator
-from langchainplus_sdk.schemas import Example, Run
+from langsmith import EvaluationResult, RunEvaluator
+from langsmith.schemas import Example, Run

 from langchain.base_language import BaseLanguageModel
 from langchain.callbacks.manager import (
--- a/langchain/experimental/client/tracing_datasets.ipynb
+++ b/langchain/experimental/client/tracing_datasets.ipynb
--- a/langchain/server.py
+++ b/langchain/server.py
@@ -2,7 +2,7 @@
 import subprocess
 from pathlib import Path

-from langchainplus_sdk.cli.main import get_docker_compose_command
+from langsmith.cli.main import get_docker_compose_command


 def main() -> None:
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,7 +108,6 @@ pyspark = {version = "^3.4.0", optional = true}
 clarifai = {version = ">=9.1.0", optional = true}
 tigrisdb = {version = "^1.0.0b6", optional = true}
 nebula3-python = {version = "^3.4.0", optional = true}
-langchainplus-sdk = "^0.0.20"
 awadb = {version = "^0.3.3", optional = true}
 azure-search-documents = {version = "11.4.0a20230509004", source = "azure-sdk-dev", optional = true}
 esprima = {version = "^4.0.1", optional = true}
@@ -118,6 +117,7 @@ psychicapi = {version = "^0.8.0", optional = true}
 cassio = {version = "^0.0.7", optional = true}
 rdflib = {version = "^6.3.2", optional = true}
 rapidfuzz = {version = "^3.1.1", optional = true}
+langsmith = "^0.0.2"

 [tool.poetry.group.docs.dependencies]
 autodoc_pydantic = "^1.8.0"
--- a/tests/integration_tests/client/test_runner_utils.py
+++ b/tests/integration_tests/client/test_runner_utils.py
@@ -3,7 +3,7 @@ from typing import Iterator
 from uuid import uuid4

 import pytest
-from langchainplus_sdk import LangChainPlusClient as Client
+from langsmith import Client as Client

 from langchain.chains.llm import LLMChain
 from langchain.chat_models import ChatOpenAI
--- a/tests/unit_tests/client/test_runner_utils.py
+++ b/tests/unit_tests/client/test_runner_utils.py
@@ -5,8 +5,8 @@ from typing import Any, Dict, List, Optional, Union
 from unittest import mock

 import pytest
-from langchainplus_sdk.client import LangChainPlusClient
-from langchainplus_sdk.schemas import Dataset, Example
+from langsmith.client import Client
+from langsmith.schemas import Dataset, Example

 from langchain.chains.base import Chain
 from langchain.chains.transform import TransformChain
@@ -235,15 +235,13 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
        pass

    with mock.patch.object(
-        LangChainPlusClient, "read_dataset", new=mock_read_dataset
-    ), mock.patch.object(
-        LangChainPlusClient, "list_examples", new=mock_list_examples
-    ), mock.patch(
+        Client, "read_dataset", new=mock_read_dataset
+    ), mock.patch.object(Client, "list_examples", new=mock_list_examples), mock.patch(
        "langchain.client.runner_utils._arun_llm_or_chain", new=mock_arun_chain
    ), mock.patch.object(
-        LangChainPlusClient, "create_project", new=mock_create_project
+        Client, "create_project", new=mock_create_project
    ):
-        client = LangChainPlusClient(api_url="http://localhost:1984", api_key="123")
+        client = Client(api_url="http://localhost:1984", api_key="123")
        chain = mock.MagicMock()
        num_repetitions = 3
        results = await arun_on_dataset(
--- a/tests/unit_tests/evaluation/run_evaluators/test_implementations.py
+++ b/tests/unit_tests/evaluation/run_evaluators/test_implementations.py
@@ -3,7 +3,7 @@
 from uuid import UUID

 import pytest
-from langchainplus_sdk.schemas import Example, Run
+from langsmith.schemas import Example, Run

 from langchain.evaluation.run_evaluators import get_criteria_evaluator, get_qa_evaluator
 from tests.unit_tests.llms.fake_llm import FakeLLM
--- a/tests/unit_tests/test_dependencies.py
+++ b/tests/unit_tests/test_dependencies.py
@@ -38,7 +38,7 @@ def test_required_dependencies(poetry_conf: Mapping[str, Any]) -> None:
        "aiohttp",
        "async-timeout",
        "dataclasses-json",
-        "langchainplus-sdk",
+        "langsmith",
        "numexpr",
        "numpy",
        "openapi-schema-pydantic",
Author	SHA1	Message	Date
William Fu-Hinthorn	4ecbb3aeac	Delete deprecated run evaluator loaders	2023-07-10 10:52:19 -07:00
William Fu-Hinthorn	4d50092103	Switch to langsmith	2023-07-10 10:49:37 -07:00