fmt

2026-02-03 15:55:44 +00:00 · 2024-08-15 10:02:18 -07:00 · 2024-08-15 09:35:54 -07:00 · 2024-08-14 20:26:15 -07:00 · 2024-08-14 20:19:21 -07:00 · 2024-08-14 18:42:12 -07:00
5 changed files with 363 additions and 12 deletions
--- a/docs/api_reference/templates/runnable_non_pydantic.rst
+++ b/docs/api_reference/templates/runnable_non_pydantic.rst
@@ -3,12 +3,12 @@

 .. currentmodule:: {{ module }}

-.. autoclass:: {{ objname }}
-
 .. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃

    The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.

+.. autoclass:: {{ objname }}
+
   {% block attributes %}
   {% if attributes %}
   .. rubric:: {{ _('Attributes') }}
--- a/libs/core/langchain_core/example_selectors/init.py
+++ b/libs/core/langchain_core/example_selectors/init.py
@@ -4,6 +4,7 @@ This allows us to select examples that are most relevant to the input.
 """

 from langchain_core.example_selectors.base import BaseExampleSelector
+from langchain_core.example_selectors.langsmith import LangSmithExampleSelector
 from langchain_core.example_selectors.length_based import (
    LengthBasedExampleSelector,
 )
@@ -19,4 +20,5 @@ __all__ = [
    "MaxMarginalRelevanceExampleSelector",
    "SemanticSimilarityExampleSelector",
    "sorted_values",
+    "LangSmithExampleSelector",
 ]
--- a/libs/core/langchain_core/example_selectors/base.py
+++ b/libs/core/langchain_core/example_selectors/base.py
@@ -10,19 +10,21 @@ class BaseExampleSelector(ABC):
    """Interface for selecting examples to include in prompts."""

    @abstractmethod
-    def add_example(self, example: Dict[str, str]) -> Any:
+    def add_example(self, example: Dict[str, Any]) -> Any:
        """Add new example to store.

        Args:
-            example: A dictionary with keys as input variables
-                and values as their values."""
+            example: A dict that maps input and output variables to their example
+                values.
+        """

-    async def aadd_example(self, example: Dict[str, str]) -> Any:
+    async def aadd_example(self, example: Dict[str, Any]) -> Any:
        """Async add new example to store.

        Args:
-            example: A dictionary with keys as input variables
-                and values as their values."""
+            example: A dict that maps input and output variables to their example
+                values.
+        """

        return await run_in_executor(None, self.add_example, example)

@@ -31,14 +33,14 @@ class BaseExampleSelector(ABC):
        """Select which examples to use based on the inputs.

        Args:
-            input_variables: A dictionary with keys as input variables
-                and values as their values."""
+            input_variables: A that maps input variables to their values.
+        """

    async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]:
        """Async select which examples to use based on the inputs.

        Args:
-            input_variables: A dictionary with keys as input variables
-                and values as their values."""
+            input_variables: A that maps input variables to their values.
+        """

        return await run_in_executor(None, self.select_examples, input_variables)
--- a/libs/core/langchain_core/example_selectors/langsmith.py
+++ b/libs/core/langchain_core/example_selectors/langsmith.py
@@ -0,0 +1,346 @@
+"""Select examples using a LangSmith few-shot index."""
+
+import json
+from typing import Any, Dict, List, Optional, Type, Union
+
+from langsmith import Client
+from langsmith import schemas as ls_schemas
+from langsmith import utils as ls_utils
+
+from langchain_core._api import beta
+from langchain_core.example_selectors.base import BaseExampleSelector
+from langchain_core.runnables import Runnable, RunnableConfig
+from langchain_core.utils.function_calling import convert_to_openai_function
+
+
+@beta()
+class LangSmithExampleSelector(
+    BaseExampleSelector, Runnable[Dict[str, Any], Dict[str, Any]]
+):
+    """Select examples using a LangSmith few-shot index.
+
+    Head to the LangSmith docs for more on how dataset indexing works:
+    `LangSmith docs <https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection>`_.
+
+    Note, initializing the ``LangSmithExampleSelector`` does **not** create a dataset.
+    This must be done explicitly, either outside the example selector or using
+    the ``LangSmithExampleSelector(...).create_dataset(...)`` method.
+
+    Args:
+        k: How many examples to return on invocation.
+        dataset_name: The name of the dataset of examples.
+        dataset_id: The ID of the dataset of examples. Must specify one of
+            dataset_name or dataset_id. If both are specified they must correspond
+            to the same dataset.
+        client: ``langsmith.Client``. If None, then ``client_kwargs`` will be used
+            to initialize a new ``langsmith.Client``.
+        client_kwargs: If ``client`` isn't specified these keyword args will be
+            used ot initialize a new ``langsmith.Client``.
+
+    .. dropdown:: Index creation
+
+        .. code-block:: python
+
+            from langchain_core.example_selectors import LangSmithExampleSelector
+
+            examples = [
+                {"input": {"question": "..."}, "outputs": {"answer": "..."}}
+                ...
+            ]
+            example_selector = LangSmithExampleSelector(
+                k=4,
+                dataset_name="foo_bar_task_few_shot_examples",
+            )
+
+            # Create the dataset.
+            example_selector.create_dataset(...)
+            # Populate the dataset.
+            example_selector.add_examples(examples)
+
+    .. dropdown:: Retrieving few shot examples
+
+        .. code-block:: python
+
+            from langchain_core.example_selectors import LangSmithExampleSelector
+            from langchain_core.prompts import ChatPromptTemplate
+            from langchain.chat_models import init_chat_model
+
+            example_selector = LangSmithExampleSelector(
+                k=4,
+                dataset_name="foo_bar_task_few_shot_examples",
+            )
+
+            instructions = "..."
+            def construct_prompt(input_: dict) -> list:
+
+                examples = []
+                for ex in input_["examples"]:
+                    examples.extend([
+                        HumanMessage(ex["inputs"]["question"], name="example_user"),
+                        AIMessage(ex["outputs"]["answer"], name="example_assistant")
+                    ])
+
+                return [
+                    SystemMessage(instructions),
+                    *examples,
+                    HumanMessage(input_["question"])
+                ]
+            llm = init_chat_model("gpt-4o", temperature=0)
+
+            chain = example_selector | construct_prompt | llm
+            chain.invoke({"question": "..."})
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        *,
+        k: int,
+        dataset_name: Optional[str] = None,
+        dataset_id: Optional[str] = None,
+        client: Optional[Client] = None,
+        **client_kwargs: Any,
+    ) -> None:
+        if client_kwargs and client:
+            raise ValueError(
+                f"Must specify one and only one of:\n{client=}\n\n{client_kwargs}."
+            )
+        self.k = k
+        self.dataset_name = dataset_name
+        self.dataset_id = dataset_id
+        self._client = client or Client(**client_kwargs)
+
+    def invoke(
+        self,
+        input: Dict[str, Any],
+        config: Optional[RunnableConfig] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Retrieve examples from the dataset that are most relevant to the input.
+
+        Args:
+            input: A dictionary of input variables.
+            config: A config to use when invoking the Runnable.
+               The config supports standard keys like 'tags', 'metadata' for tracing
+               purposes, 'max_concurrency' for controlling how much work to do
+               in parallel, and other keys. Please refer to the
+               :class:`~langchain_core.runnables.config.RunnableConfig` for more
+               details.
+            kwargs: Additional keyword arguments are passed through to
+                ``select_examples()``.
+
+        Returns:
+            A dictionary with the inputs plus an "examples" key which contains a list
+            of the retrieved examples. Each example is a dictionary with top-level
+            "inputs" and "outputs" keys which contain a mapping of the input
+            and output variables to their example values.
+        """
+        return {
+            **input,
+            **{
+                "examples": self._call_with_config(
+                    self.select_examples,  # type: ignore[arg-type]
+                    input,
+                    config,
+                    **kwargs,
+                )
+            },
+        }
+
+    async def ainvoke(
+        self,
+        input: Dict[str, Any],
+        config: Optional[RunnableConfig] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Retrieve examples from the dataset that are most relevant to the input.
+
+        Args:
+            input: A dictionary of input variables.
+            config: A config to use when invoking the Runnable.
+               The config supports standard keys like 'tags', 'metadata' for tracing
+               purposes, 'max_concurrency' for controlling how much work to do
+               in parallel, and other keys. Please refer to the
+               :class:`~langchain_core.runnables.config.RunnableConfig` for more
+               details.
+            kwargs: Additional keyword arguments are passed through to
+                ``select_examples()``.
+
+        Returns:
+            A dictionary with the inputs plus an "examples" key which contains a list
+            of the retrieved examples. Each example is a dictionary with top-level
+            "inputs" and "outputs" keys which contain a mapping of the input
+            and output variables to their example values.
+        """
+        return {
+            **input,
+            **{
+                "examples": await self._acall_with_config(
+                    self.aselect_examples,  # type: ignore[arg-type]
+                    input,
+                    config,
+                    **kwargs,
+                )
+            },
+        }
+
+    def select_examples(self, input_variables: Dict[str, str]) -> List[Dict[str, Any]]:
+        """Select which examples to use based on the inputs.
+
+        Args:
+            input_variables: A that maps input variables to their values.
+
+        Returns:
+            A list of examples. Each example is a dictionary with a top level "inputs"
+            and "outputs" key. The values of these are dictionaries which map the
+            input and output variables to their example values.
+        """
+        search_req_json = json.dumps({"inputs": input_variables, "limit": self.k})
+        dataset_id = self._get_dataset_id()
+        few_shot_resp = self._client.request_with_retries(
+            "POST",
+            f"/datasets/{dataset_id}/search",
+            headers={**self._client._headers, "Content-Type": "application/json"},
+            data=search_req_json,
+        )
+        ls_utils.raise_for_status_with_text(few_shot_resp)
+        return few_shot_resp.json()["examples"]
+
+    def add_examples(self, examples: List[Dict[str, Dict]]) -> Any:
+        """Add new examples to store.
+
+        Args:
+            examples: A list of dicts. Each dict must have a top-level "inputs" key
+                that contains the inputs for the example and an "outputs" key that
+                contains the outputs for the example.
+
+        Returns:
+            None
+
+        Raises:
+            ValueError: if dataset with (self.dataset_name, self.dataset_id) does not
+                exist yet.
+        """
+        if not self.dataset_exists():
+            identifiers = ", ".join(
+                arg
+                for arg in [
+                    f"dataset_name={self.dataset_name}",
+                    f"dataset_id={self.dataset_id}",
+                ]
+                if arg
+            )
+            raise ValueError(
+                f"Dataset with {identifiers} does not exist yet."
+                f" Please run `LangSmithExampleSelector(...).create_dataset(...)` to"
+                f" create this dataset and then try adding examples."
+            )
+        self._client.create_examples(
+            dataset_name=self.dataset_name,
+            dataset_id=self.dataset_id,
+            inputs=[e["inputs"] for e in examples],
+            outputs=[e["outputs"] for e in examples],
+        )
+
+    def add_example(self, example: Dict[str, Dict]) -> Any:
+        """Add new example to store.
+
+        Args:
+            example: A dict that must have a top-level "inputs" key
+                that contains the inputs for the example and an "outputs" key that
+                contains the outputs for the example.
+        """
+        return self.add_examples([example])
+
+    def create_dataset(
+        self,
+        input_schema: Union[Dict, Type],
+        output_schema: Union[Dict, Type],
+        *,
+        description: Optional[str] = None,
+    ) -> str:
+        """Create a dataset to index examples into and retrieve examples from.
+
+        Args:
+            input_schema: The expected schema for all example inputs, passed in as a
+                JSON Schema or a TypedDict class.
+            output_schema: The expected schema for all example outputs, passed in as a
+                JSON Schema or a TypedDict class.
+            description: An optional description for the dataset.
+
+        Returns:
+            String dataset ID of the newly created dataset. If
+            ``LangSmithExampleSelector.dataset_id`` is set, this will be the ID that's
+            returned.
+
+        Raises:
+            ValueError: If ``LangSmithExampleSelector.dataset_name`` isn't set.
+        """
+        if not self.dataset_name:
+            raise ValueError(
+                "`LangSmithExampleSelector.dataset_name` must be set to be able to "
+                "create a dataset. Please initialize a new example selector with "
+                "init arg `dataset_name` passed in."
+            )
+
+        dataset_to_create_json = {
+            "name": self.dataset_name,
+            "description": description or "Dataset of indexed few-shot examples.",
+            "data_type": "kv",
+            "inputs_schema_definition": _convert_to_json_schema(input_schema),
+            "outputs_schema_definition": _convert_to_json_schema(output_schema),
+            "id": self.dataset_id,
+        }
+
+        # Create dataset.
+        headers = {**self._client._headers, "Content-Type": "application/json"}
+        resp = self._client.request_with_retries(
+            "POST",
+            "/datasets",
+            headers=headers,
+            data=json.dumps(dataset_to_create_json),
+        )
+
+        ls_utils.raise_for_status_with_text(resp)
+        dataset = ls_schemas.Dataset(
+            **resp.json(),
+            _host_url=self._client._host_url,
+            _tenant_id=self._client._get_optional_tenant_id(),
+        )
+
+        dataset_id = str(dataset.id)
+
+        # Turn on dataset indexing.
+        resp = self._client.request_with_retries(
+            "POST",
+            f"/datasets/{dataset_id}/index",
+            headers=headers,
+            data=json.dumps({"tag": "latest"}),
+        )
+
+        ls_utils.raise_for_status_with_text(resp)
+
+        return dataset_id
+
+    def dataset_exists(self) -> bool:
+        """Returns True if the configured dataset already exists, otherwise False."""
+        return self._client.has_dataset(
+            dataset_name=self.dataset_name, dataset_id=self.dataset_id
+        )
+
+    def _get_dataset_id(self) -> str:
+        if self.dataset_id:
+            return self.dataset_id
+        else:
+            dataset = self._client.read_dataset(dataset_name=self.dataset_name)
+            return str(dataset.id)
+
+
+def _convert_to_json_schema(schema: Union[Dict, Type]) -> Dict:
+    # TODO: Flip so that theres a generic convert_to_json_schema function that
+    # convert_to_openai_function uses.
+    oai_function = convert_to_openai_function(schema)
+    return {
+        "title": oai_function["name"],
+        "description": oai_function["description"],
+        **oai_function["parameters"],
+    }
--- a/libs/core/tests/unit_tests/example_selectors/test_imports.py
+++ b/libs/core/tests/unit_tests/example_selectors/test_imports.py
@@ -2,6 +2,7 @@ from langchain_core.example_selectors import __all__

 EXPECTED_ALL = [
    "BaseExampleSelector",
+    "LangSmithExampleSelector",
    "LengthBasedExampleSelector",
    "MaxMarginalRelevanceExampleSelector",
    "SemanticSimilarityExampleSelector",
Author	SHA1	Message	Date
Bagatur	e94bd97b37	fmt	2024-08-15 10:02:18 -07:00
Bagatur	9557555f1f	fmt	2024-08-15 09:35:54 -07:00
Bagatur	3b80e9d59c	fmt	2024-08-14 20:26:15 -07:00
Bagatur	a6d65ac891	fmt	2024-08-14 20:19:21 -07:00
Bagatur	22e57eb5de	fmt	2024-08-14 18:42:12 -07:00
Bagatur	af2c129f8a	fmt	2024-08-14 16:15:57 -07:00
Bagatur	0c4b09afe3	Merge branch 'master' into bagatutr/langsmith_example_selector	2024-08-14 16:00:14 -07:00
Bagatur	b2e7f82460	fmt	2024-08-14 15:32:01 -07:00
Bagatur	97ff183483	fmt	2024-08-14 14:41:29 -07:00
Bagatur	1b3c16268b	wip	2024-08-14 12:15:54 -07:00