community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)

Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2025-09-23 11:30:37 +00:00 · 2023-12-11 13:53:30 -08:00
parent c0f4b95aa9
commit ed58eeb9c5
2446 changed files with 171805 additions and 137118 deletions
--- a/libs/community/langchain_community/retrievers/svm.py
+++ b/libs/community/langchain_community/retrievers/svm.py
@@ -0,0 +1,128 @@
+from __future__ import annotations
+
+import concurrent.futures
+from typing import Any, Iterable, List, Optional
+
+import numpy as np
+from langchain_core.callbacks import CallbackManagerForRetrieverRun
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.retrievers import BaseRetriever
+
+
+def create_index(contexts: List[str], embeddings: Embeddings) -> np.ndarray:
+    """
+    Create an index of embeddings for a list of contexts.
+
+    Args:
+        contexts: List of contexts to embed.
+        embeddings: Embeddings model to use.
+
+    Returns:
+        Index of embeddings.
+    """
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        return np.array(list(executor.map(embeddings.embed_query, contexts)))
+
+
+class SVMRetriever(BaseRetriever):
+    """`SVM` retriever.
+
+    Largely based on
+    https://github.com/karpathy/randomfun/blob/master/knn_vs_svm.ipynb
+    """
+
+    embeddings: Embeddings
+    """Embeddings model to use."""
+    index: Any
+    """Index of embeddings."""
+    texts: List[str]
+    """List of texts to index."""
+    metadatas: Optional[List[dict]] = None
+    """List of metadatas corresponding with each text."""
+    k: int = 4
+    """Number of results to return."""
+    relevancy_threshold: Optional[float] = None
+    """Threshold for relevancy."""
+
+    class Config:
+
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embeddings: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any,
+    ) -> SVMRetriever:
+        index = create_index(texts, embeddings)
+        return cls(
+            embeddings=embeddings,
+            index=index,
+            texts=texts,
+            metadatas=metadatas,
+            **kwargs,
+        )
+
+    @classmethod
+    def from_documents(
+        cls,
+        documents: Iterable[Document],
+        embeddings: Embeddings,
+        **kwargs: Any,
+    ) -> SVMRetriever:
+        texts, metadatas = zip(*((d.page_content, d.metadata) for d in documents))
+        return cls.from_texts(
+            texts=texts, embeddings=embeddings, metadatas=metadatas, **kwargs
+        )
+
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        try:
+            from sklearn import svm
+        except ImportError:
+            raise ImportError(
+                "Could not import scikit-learn, please install with `pip install "
+                "scikit-learn`."
+            )
+
+        query_embeds = np.array(self.embeddings.embed_query(query))
+        x = np.concatenate([query_embeds[None, ...], self.index])
+        y = np.zeros(x.shape[0])
+        y[0] = 1
+
+        clf = svm.LinearSVC(
+            class_weight="balanced", verbose=False, max_iter=10000, tol=1e-6, C=0.1
+        )
+        clf.fit(x, y)
+
+        similarities = clf.decision_function(x)
+        sorted_ix = np.argsort(-similarities)
+
+        # svm.LinearSVC in scikit-learn is non-deterministic.
+        # if a text is the same as a query, there is no guarantee
+        # the query will be in the first index.
+        # this performs a simple swap, this works because anything
+        # left of the 0 should be equivalent.
+        zero_index = np.where(sorted_ix == 0)[0][0]
+        if zero_index != 0:
+            sorted_ix[0], sorted_ix[zero_index] = sorted_ix[zero_index], sorted_ix[0]
+
+        denominator = np.max(similarities) - np.min(similarities) + 1e-6
+        normalized_similarities = (similarities - np.min(similarities)) / denominator
+
+        top_k_results = []
+        for row in sorted_ix[1 : self.k + 1]:
+            if (
+                self.relevancy_threshold is None
+                or normalized_similarities[row] >= self.relevancy_threshold
+            ):
+                metadata = self.metadatas[row - 1] if self.metadatas else {}
+                doc = Document(page_content=self.texts[row - 1], metadata=metadata)
+                top_k_results.append(doc)
+        return top_k_results