LLMRails (#10796)

### LLMRails Integration This PR provides integration with LLMRails. Implemented here are: langchain/vectorstore/llm_rails.py tests/integration_tests/vectorstores/test_llm_rails.py docs/extras/integrations/vectorstores/llm-rails.ipynb --------- Co-authored-by: Anar Aliyev <aaliyev@mgmt.cloudnet.services> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-09 23:12:38 +00:00 · 2023-09-20 07:33:33 +04:00
parent 900dbd1cbe
commit c656a6b966
4 changed files with 568 additions and 0 deletions
--- a/libs/langchain/langchain/vectorstores/init.py
+++ b/libs/langchain/langchain/vectorstores/init.py
@@ -46,6 +46,7 @@ from langchain.vectorstores.epsilla import Epsilla
 from langchain.vectorstores.faiss import FAISS
 from langchain.vectorstores.hologres import Hologres
 from langchain.vectorstores.lancedb import LanceDB
+from langchain.vectorstores.llm_rails import LLMRails
 from langchain.vectorstores.marqo import Marqo
 from langchain.vectorstores.matching_engine import MatchingEngine
 from langchain.vectorstores.meilisearch import Meilisearch
@@ -107,6 +108,7 @@ __all__ = [
    "FAISS",
    "Hologres",
    "LanceDB",
+    "LLMRails",
    "Marqo",
    "MatchingEngine",
    "Meilisearch",
--- a/libs/langchain/langchain/vectorstores/llm_rails.py
+++ b/libs/langchain/langchain/vectorstores/llm_rails.py
@@ -0,0 +1,203 @@
+"""Wrapper around LLMRails vector database."""
+from __future__ import annotations
+
+import json
+import logging
+import os
+import uuid
+from enum import Enum
+from typing import Any, Iterable, List, Optional, Tuple
+
+import requests
+
+from langchain.pydantic_v1 import Field
+from langchain.schema import Document
+from langchain.schema.embeddings import Embeddings
+from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
+
+
+class ModelChoices(str, Enum):
+    embedding_english_v1 = "embedding-english-v1"
+    embedding_multi_v1 = "embedding-multi-v1"
+
+
+class LLMRails(VectorStore):
+    """Implementation of Vector Store using LLMRails (https://llmrails.com/).
+    Example:
+        .. code-block:: python
+
+            from langchain.vectorstores import LLMRails
+
+            vectorstore = LLMRails(
+                api_key=llm_rails_api_key,
+                datastore_id=datastore_id
+            )
+    """
+
+    def __init__(
+        self,
+        datastore_id: Optional[str] = None,
+        api_key: Optional[str] = None,
+    ):
+        """Initialize with LLMRails API."""
+        self._datastore_id = datastore_id or os.environ.get("LLM_RAILS_DATASTORE_ID")
+        self._api_key = api_key or os.environ.get("LLM_RAILS_API_KEY")
+        if self._api_key is None:
+            logging.warning("Can't find Rails credentials in environment.")
+
+        self._session = requests.Session()  # to reuse connections
+        self.datastore_id = datastore_id
+        self.base_url = "https://api.llmrails.com/v1"
+
+    def _get_post_headers(self) -> dict:
+        """Returns headers that should be attached to each post request."""
+        return {
+            "X-API-KEY": self._api_key,
+            "Content-Type": "application/json",
+        }
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+
+        """
+        names: List[str] = []
+        for text in texts:
+            doc_name = str(uuid.uuid4())
+            response = self._session.post(
+                f"{self.base_url}/datastores/{self._datastore_id}/text",
+                json={"name": doc_name, "text": text},
+                verify=True,
+                headers=self._get_post_headers(),
+            )
+
+            if response.status_code != 200:
+                logging.error(
+                    f"Create request failed for doc_name = {doc_name} with status code "
+                    f"{response.status_code}, reason {response.reason}, text "
+                    f"{response.text}"
+                )
+
+                return names
+
+            names.append(doc_name)
+
+        return names
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 5
+    ) -> List[Tuple[Document, float]]:
+        """Return LLMRails documents most similar to query, along with scores.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 5 Max 10.
+            alpha: parameter for hybrid search .
+
+        Returns:
+            List of Documents most similar to the query and score for each.
+        """
+        response = self._session.post(
+            headers=self._get_post_headers(),
+            url=f"{self.base_url}/datastores/{self._datastore_id}/search",
+            data=json.dumps({"k": k, "text": query}),
+            timeout=10,
+        )
+
+        if response.status_code != 200:
+            logging.error(
+                "Query failed %s",
+                f"(code {response.status_code}, reason {response.reason}, details "
+                f"{response.text})",
+            )
+            return []
+
+        results = response.json()["results"]
+        docs = [
+            (
+                Document(
+                    page_content=x["text"],
+                    metadata={
+                        key: value
+                        for key, value in x["metadata"].items()
+                        if key != "score"
+                    },
+                ),
+                x["metadata"]["score"],
+            )
+            for x in results
+        ]
+
+        return docs
+
+    def similarity_search(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Document]:
+        """Return LLMRails documents most similar to query, along with scores.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 5.
+
+        Returns:
+            List of Documents most similar to the query
+        """
+        docs_and_scores = self.similarity_search_with_score(query, k=k)
+
+        return [doc for doc, _ in docs_and_scores]
+
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embedding: Optional[Embeddings] = None,
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any,
+    ) -> LLMRails:
+        """Construct LLMRails wrapper from raw documents.
+        This is intended to be a quick way to get started.
+        Example:
+            .. code-block:: python
+
+                from langchain.vectorstores import LLMRails
+                llm_rails = LLMRails.from_texts(
+                    texts,
+                    datastore_id=datastore_id,
+                    api_key=llm_rails_api_key
+                )
+        """
+        # Note: LLMRails generates its own embeddings, so we ignore the provided
+        # embeddings (required by interface)
+        llm_rails = cls(**kwargs)
+        llm_rails.add_texts(texts)
+        return llm_rails
+
+    def as_retriever(self, **kwargs: Any) -> LLMRailsRetriever:
+        return LLMRailsRetriever(vectorstore=self, **kwargs)
+
+
+class LLMRailsRetriever(VectorStoreRetriever):
+    vectorstore: LLMRails
+    search_kwargs: dict = Field(default_factory=lambda: {"k": 5})
+    """Search params.
+        k: Number of Documents to return. Defaults to 5.
+        alpha: parameter for hybrid search .
+    """
+
+    def add_texts(self, texts: List[str]) -> None:
+        """Add text to the datastore.
+
+        Args:
+            texts (List[str]): The text
+        """
+        self.vectorstore.add_texts(texts)
--- a/libs/langchain/tests/integration_tests/vectorstores/test_llm_rails.py
+++ b/libs/langchain/tests/integration_tests/vectorstores/test_llm_rails.py
@@ -0,0 +1,35 @@
+from langchain.vectorstores.llm_rails import LLMRails
+
+#
+# For this test to run properly, please setup as follows:
+# 1. Create a LLMRails account: sign up at https://console.llmrails.com/signup
+# 2. Create an API_KEY for this corpus with permissions for query and indexing
+# 3. Create a datastorea and get its id from datastore setting
+# 3. Setup environment variable:
+#   LLM_RAILS_API_KEY, LLM_RAILS_DATASTORE_ID
+#
+
+
+def test_llm_rails_add_documents() -> None:
+    """Test end to end construction and search."""
+
+    # create a new Vectara instance
+    docsearch: LLMRails = LLMRails()
+
+    # start with some initial texts, added with add_texts
+    texts1 = ["large language model", "information retrieval", "question answering"]
+    docsearch.add_texts(texts1)
+
+    # test without filter
+    output1 = docsearch.similarity_search("large language model", k=1)
+
+    print(output1)
+    assert len(output1) == 1
+    assert output1[0].page_content == "large language model"
+
+    # test without filter but with similarity score
+    output2 = docsearch.similarity_search_with_score("large language model", k=1)
+
+    assert len(output2) == 1
+    assert output2[0][0].page_content == "large language model"
+    assert output2[0][1] > 0