mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-31 16:08:59 +00:00 
			
		
		
		
	Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
		
			
				
	
	
		
			245 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			245 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Wrapper around LLMRails vector database."""
 | |
| from __future__ import annotations
 | |
| 
 | |
| import json
 | |
| import logging
 | |
| import os
 | |
| import uuid
 | |
| from typing import Any, Iterable, List, Optional, Tuple
 | |
| 
 | |
| import requests
 | |
| from langchain_core.documents import Document
 | |
| from langchain_core.embeddings import Embeddings
 | |
| from langchain_core.pydantic_v1 import Field
 | |
| from langchain_core.vectorstores import VectorStore, VectorStoreRetriever
 | |
| 
 | |
| 
 | |
| class LLMRails(VectorStore):
 | |
|     """Implementation of Vector Store using LLMRails.
 | |
| 
 | |
|      See https://llmrails.com/
 | |
| 
 | |
|     Example:
 | |
|         .. code-block:: python
 | |
| 
 | |
|             from langchain_community.vectorstores import LLMRails
 | |
| 
 | |
|             vectorstore = LLMRails(
 | |
|                 api_key=llm_rails_api_key,
 | |
|                 datastore_id=datastore_id
 | |
|             )
 | |
|     """
 | |
| 
 | |
|     def __init__(
 | |
|         self,
 | |
|         datastore_id: Optional[str] = None,
 | |
|         api_key: Optional[str] = None,
 | |
|     ):
 | |
|         """Initialize with LLMRails API."""
 | |
|         self._datastore_id = datastore_id or os.environ.get("LLM_RAILS_DATASTORE_ID")
 | |
|         self._api_key = api_key or os.environ.get("LLM_RAILS_API_KEY")
 | |
|         if self._api_key is None:
 | |
|             logging.warning("Can't find Rails credentials in environment.")
 | |
| 
 | |
|         self._session = requests.Session()  # to reuse connections
 | |
|         self.datastore_id = datastore_id
 | |
|         self.base_url = "https://api.llmrails.com/v1"
 | |
| 
 | |
|     def _get_post_headers(self) -> dict:
 | |
|         """Returns headers that should be attached to each post request."""
 | |
|         return {"X-API-KEY": self._api_key}
 | |
| 
 | |
|     def add_texts(
 | |
|         self,
 | |
|         texts: Iterable[str],
 | |
|         metadatas: Optional[List[dict]] = None,
 | |
|         **kwargs: Any,
 | |
|     ) -> List[str]:
 | |
|         """Run more texts through the embeddings and add to the vectorstore.
 | |
| 
 | |
|         Args:
 | |
|             texts: Iterable of strings to add to the vectorstore.
 | |
| 
 | |
|         Returns:
 | |
|             List of ids from adding the texts into the vectorstore.
 | |
| 
 | |
|         """
 | |
|         names: List[str] = []
 | |
|         for text in texts:
 | |
|             doc_name = str(uuid.uuid4())
 | |
|             response = self._session.post(
 | |
|                 f"{self.base_url}/datastores/{self._datastore_id}/text",
 | |
|                 json={"name": doc_name, "text": text},
 | |
|                 verify=True,
 | |
|                 headers=self._get_post_headers(),
 | |
|             )
 | |
| 
 | |
|             if response.status_code != 200:
 | |
|                 logging.error(
 | |
|                     f"Create request failed for doc_name = {doc_name} with status code "
 | |
|                     f"{response.status_code}, reason {response.reason}, text "
 | |
|                     f"{response.text}"
 | |
|                 )
 | |
| 
 | |
|                 return names
 | |
| 
 | |
|             names.append(doc_name)
 | |
| 
 | |
|         return names
 | |
| 
 | |
|     def add_files(
 | |
|         self,
 | |
|         files_list: Iterable[str],
 | |
|         metadatas: Optional[List[dict]] = None,
 | |
|         **kwargs: Any,
 | |
|     ) -> bool:
 | |
|         """
 | |
|         LLMRails provides a way to add documents directly via our API where
 | |
|         pre-processing and chunking occurs internally in an optimal way
 | |
|         This method provides a way to use that API in LangChain
 | |
| 
 | |
|         Args:
 | |
|             files_list: Iterable of strings, each representing a local file path.
 | |
|                     Files could be text, HTML, PDF, markdown, doc/docx, ppt/pptx, etc.
 | |
|                     see API docs for full list
 | |
| 
 | |
|         Returns:
 | |
|             List of ids associated with each of the files indexed
 | |
|         """
 | |
|         files = []
 | |
| 
 | |
|         for file in files_list:
 | |
|             if not os.path.exists(file):
 | |
|                 logging.error(f"File {file} does not exist, skipping")
 | |
|                 continue
 | |
| 
 | |
|             files.append(("file", (os.path.basename(file), open(file, "rb"))))
 | |
| 
 | |
|         response = self._session.post(
 | |
|             f"{self.base_url}/datastores/{self._datastore_id}/file",
 | |
|             files=files,
 | |
|             verify=True,
 | |
|             headers=self._get_post_headers(),
 | |
|         )
 | |
| 
 | |
|         if response.status_code != 200:
 | |
|             logging.error(
 | |
|                 f"Create request failed for datastore = {self._datastore_id} "
 | |
|                 f"with status code {response.status_code}, reason {response.reason}, "
 | |
|                 f"text {response.text}"
 | |
|             )
 | |
| 
 | |
|             return False
 | |
| 
 | |
|         return True
 | |
| 
 | |
|     def similarity_search_with_score(
 | |
|         self, query: str, k: int = 5
 | |
|     ) -> List[Tuple[Document, float]]:
 | |
|         """Return LLMRails documents most similar to query, along with scores.
 | |
| 
 | |
|         Args:
 | |
|             query: Text to look up documents similar to.
 | |
|             k: Number of Documents to return. Defaults to 5 Max 10.
 | |
|             alpha: parameter for hybrid search .
 | |
| 
 | |
|         Returns:
 | |
|             List of Documents most similar to the query and score for each.
 | |
|         """
 | |
|         response = self._session.post(
 | |
|             headers=self._get_post_headers(),
 | |
|             url=f"{self.base_url}/datastores/{self._datastore_id}/search",
 | |
|             data=json.dumps({"k": k, "text": query}),
 | |
|             timeout=10,
 | |
|         )
 | |
| 
 | |
|         if response.status_code != 200:
 | |
|             logging.error(
 | |
|                 "Query failed %s",
 | |
|                 f"(code {response.status_code}, reason {response.reason}, details "
 | |
|                 f"{response.text})",
 | |
|             )
 | |
|             return []
 | |
| 
 | |
|         results = response.json()["results"]
 | |
|         docs = [
 | |
|             (
 | |
|                 Document(
 | |
|                     page_content=x["text"],
 | |
|                     metadata={
 | |
|                         key: value
 | |
|                         for key, value in x["metadata"].items()
 | |
|                         if key != "score"
 | |
|                     },
 | |
|                 ),
 | |
|                 x["metadata"]["score"],
 | |
|             )
 | |
|             for x in results
 | |
|         ]
 | |
| 
 | |
|         return docs
 | |
| 
 | |
|     def similarity_search(
 | |
|         self, query: str, k: int = 4, **kwargs: Any
 | |
|     ) -> List[Document]:
 | |
|         """Return LLMRails documents most similar to query, along with scores.
 | |
| 
 | |
|         Args:
 | |
|             query: Text to look up documents similar to.
 | |
|             k: Number of Documents to return. Defaults to 5.
 | |
| 
 | |
|         Returns:
 | |
|             List of Documents most similar to the query
 | |
|         """
 | |
|         docs_and_scores = self.similarity_search_with_score(query, k=k)
 | |
| 
 | |
|         return [doc for doc, _ in docs_and_scores]
 | |
| 
 | |
|     @classmethod
 | |
|     def from_texts(
 | |
|         cls,
 | |
|         texts: List[str],
 | |
|         embedding: Optional[Embeddings] = None,
 | |
|         metadatas: Optional[List[dict]] = None,
 | |
|         **kwargs: Any,
 | |
|     ) -> LLMRails:
 | |
|         """Construct LLMRails wrapper from raw documents.
 | |
|         This is intended to be a quick way to get started.
 | |
|         Example:
 | |
|             .. code-block:: python
 | |
| 
 | |
|                 from langchain_community.vectorstores import LLMRails
 | |
|                 llm_rails = LLMRails.from_texts(
 | |
|                     texts,
 | |
|                     datastore_id=datastore_id,
 | |
|                     api_key=llm_rails_api_key
 | |
|                 )
 | |
|         """
 | |
|         # Note: LLMRails generates its own embeddings, so we ignore the provided
 | |
|         # embeddings (required by interface)
 | |
|         llm_rails = cls(**kwargs)
 | |
|         llm_rails.add_texts(texts)
 | |
|         return llm_rails
 | |
| 
 | |
|     def as_retriever(self, **kwargs: Any) -> LLMRailsRetriever:
 | |
|         return LLMRailsRetriever(vectorstore=self, **kwargs)
 | |
| 
 | |
| 
 | |
| class LLMRailsRetriever(VectorStoreRetriever):
 | |
|     """Retriever for LLMRails."""
 | |
| 
 | |
|     vectorstore: LLMRails
 | |
|     search_kwargs: dict = Field(default_factory=lambda: {"k": 5})
 | |
|     """Search params.
 | |
|         k: Number of Documents to return. Defaults to 5.
 | |
|         alpha: parameter for hybrid search .
 | |
|     """
 | |
| 
 | |
|     def add_texts(self, texts: List[str]) -> None:
 | |
|         """Add text to the datastore.
 | |
| 
 | |
|         Args:
 | |
|             texts (List[str]): The text
 | |
|         """
 | |
|         self.vectorstore.add_texts(texts)
 |