community[patch], docs: Add API reference doc for GraphVectorStore (#25751)

2025-09-09 15:03:21 +00:00 · 2024-08-31 02:42:00 +02:00
parent 28e2ec7603
commit 0a752a74cc
5 changed files with 204 additions and 16 deletions
--- a/libs/community/langchain_community/graph_vectorstores/init.py
+++ b/libs/community/langchain_community/graph_vectorstores/init.py
@@ -1,3 +1,161 @@
-from langchain_community.graph_vectorstores.cassandra import CassandraGraphVectorStore
+"""**Graph Vector Store**
-__all__ = ["CassandraGraphVectorStore"]
+Sometimes embedding models don’t capture all the important relationships between
 documents.
 Graph Vector Stores are an extension to both vector stores and retrievers that allow
 documents to be explicitly connected to each other.
 Graph vector store retrievers use both vector similarity and links to find documents
 related to an unstructured query.
 Graphs allow linking between documents.
 Each document identifies tags that link to and from it.
 For example, a paragraph of text may be linked to URLs based on the anchor tags in
 it's content and linked from the URL(s) it is published at.
 Link extractors can be used to extract links from documents.
 Example:
 .. code-block:: python
    graph_vector_store = CassandraGraphVectorStore()
    link_extractor = HtmlLinkExtractor()
    links = link_extractor.extract_one(HtmlInput(document.page_content, "http://mysite"))
    add_links(document, links)
    graph_vector_store.add_document(document)
 ***********
 Get started
 ***********
 We chunk the State of the Union text and split it into documents.
 .. code-block:: python
    from langchain_community.document_loaders import TextLoader
    from langchain_text_splitters import CharacterTextSplitter
    raw_documents = TextLoader("state_of_the_union.txt").load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    documents = text_splitter.split_documents(raw_documents)
 Links can be added to documents manually but it's easier to use a
 :class:`~langchain_community.graph_vectorstores.extractors.LinkExtractor`.
 Several common link extractors are available and you can build your own.
 For this guide, we'll use the
 :class:`~langchain_community.graph_vectorstores.extractors.KeybertLinkExtractor`
 which uses the KeyBERT model to tag documents with keywords and uses these keywords to
 create links between documents.
 .. code-block:: python
    from langchain_community.graph_vectorstores.extractors import KeybertLinkExtractor
    from langchain_community.graph_vectorstores.links import add_links
    extractor = KeybertLinkExtractor()
    for doc in documents:
        add_links(doc, extractor.extract_one(doc))
 ***********************************************
 Create the graph vector store and add documents
 ***********************************************
 We'll use an Apache Cassandra or Astra DB database as an example.
 We create a :class:`~langchain_community.graph_vectorstores.CassandraGraphVectorStore`
 from the documents and an :class:`~langchain_openai.OpenAIEmbeddings` model.
 .. code-block:: python
    import cassio
    from langchain_community.graph_vectorstores import CassandraGraphVectorStore
    from langchain_openai import OpenAIEmbeddings
    # Initialize cassio and the Cassandra session from the environment variables
    cassio.init(auto=True)
    store = CassandraGraphVectorStore.from_documents(
        embedding=OpenAIEmbeddings(),
        documents=documents,
    )
 *****************
 Similarity search
 *****************
 If we don't traverse the graph, a graph vector store behaves like a regular vector
 store.
 So all methods available in a vector store are also available in a graph vector store.
 The :meth:`~langchain_community.graph_vectorstores.base.GraphVectorStore.similarity_search`
 method returns documents similar to a query without considering
 the links between documents.
 .. code-block:: python
    docs = store.similarity_search(
        "What did the president say about Ketanji Brown Jackson?"
    )
 ****************
 Traversal search
 ****************
 The :meth:`~langchain_community.graph_vectorstores.base.GraphVectorStore.traversal_search`
 method returns documents similar to a query considering the links
 between documents. It first does a similarity search and then traverses the graph to
 find linked documents.
 .. code-block:: python
    docs = list(
        store.traversal_search("What did the president say about Ketanji Brown Jackson?")
    )
 *************
 Async methods
 *************
 The graph vector store has async versions of the methods prefixed with ``a``.
 .. code-block:: python
    docs = [
        doc
        async for doc in store.atraversal_search(
            "What did the president say about Ketanji Brown Jackson?"
        )
    ]
 ****************************
 Graph vector store retriever
 ****************************
 The graph vector store can be converted to a retriever.
 It is similar to the vector store retriever but it also has traversal search methods
 such as ``traversal`` and ``mmr_traversal``.
 .. code-block:: python
    retriever = store.as_retriever(search_type="mmr_traversal")
    docs = retriever.invoke("What did the president say about Ketanji Brown Jackson?")
 """  # noqa: E501
 from langchain_community.graph_vectorstores.base import (
    GraphVectorStore,
    GraphVectorStoreRetriever,
    Node,
 )
 from langchain_community.graph_vectorstores.cassandra import CassandraGraphVectorStore
 from langchain_community.graph_vectorstores.links import (
    Link,
 )
 __all__ = [
    "GraphVectorStore",
    "GraphVectorStoreRetriever",
    "Node",
    "Link",
    "CassandraGraphVectorStore",
 ]
--- a/libs/community/langchain_community/graph_vectorstores/base.py
+++ b/libs/community/langchain_community/graph_vectorstores/base.py
@@ -0,0 +1,7 @@
 from langchain_core.graph_vectorstores.base import (
    GraphVectorStore,
    GraphVectorStoreRetriever,
    Node,
 )
 __all__ = ["GraphVectorStore", "GraphVectorStoreRetriever", "Node"]
--- a/libs/community/langchain_community/graph_vectorstores/links.py
+++ b/libs/community/langchain_community/graph_vectorstores/links.py
@@ -0,0 +1,8 @@
 from langchain_core.graph_vectorstores.links import (
    Link,
    add_links,
    copy_with_links,
    get_links,
 )
 __all__ = ["Link", "add_links", "get_links", "copy_with_links"]
--- a/libs/core/langchain_core/graph_vectorstores/base.py
+++ b/libs/core/langchain_core/graph_vectorstores/base.py
@@ -38,10 +38,11 @@ class Node(Serializable):
    Edges exist from nodes with an outgoing link to nodes with a matching incoming link.
-    For instance two nodes `a` and `b` connected over a hyperlink `https://some-url`
+    For instance two nodes `a` and `b` connected over a hyperlink ``https://some-url``
    would look like:
    .. code-block:: python
        [
            Node(
                id="a",
@@ -118,6 +119,13 @@ def _documents_to_nodes(documents: Iterable[Document]) -> Iterator[Node]:
@beta()
 def nodes_to_documents(nodes: Iterable[Node]) -> Iterator[Document]:
    """Convert nodes to documents.
    Args:
        nodes: The nodes to convert to documents.
    Returns:
        The documents generated from the nodes.
    """
    for node in nodes:
        metadata = node.metadata.copy()
        metadata[METADATA_LINKS_KEY] = [
@@ -594,19 +602,24 @@ class GraphVectorStore(VectorStore):
        """Return GraphVectorStoreRetriever initialized from this GraphVectorStore.
        Args:
-            search_type (Optional[str]): Defines the type of search that
+            **kwargs: Keyword arguments to pass to the search function.
-                the Retriever should perform.
+                Can include:
-                Can be "traversal" (default), "similarity", "mmr", or
+
-                "similarity_score_threshold".
+                - search_type (Optional[str]): Defines the type of search that
-            search_kwargs (Optional[Dict]): Keyword arguments to pass to the
+                  the Retriever should perform.
-                search function. Can include things like:
+                  Can be ``traversal`` (default), ``similarity``, ``mmr``, or
-                    k: Amount of documents to return (Default: 4)
+                  ``similarity_score_threshold``.
-                    depth: The maximum depth of edges to traverse (Default: 1)
+                - search_kwargs (Optional[Dict]): Keyword arguments to pass to the
-                    score_threshold: Minimum relevance threshold
+                  search function. Can include things like:
-                        for similarity_score_threshold
+
-                    fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
+                  - k(int): Amount of documents to return (Default: 4).
-                    lambda_mult: Diversity of results returned by MMR;
+                  - depth(int): The maximum depth of edges to traverse (Default: 1).
-                        1 for minimum diversity and 0 for maximum. (Default: 0.5)
+                  - score_threshold(float): Minimum relevance threshold
                    for similarity_score_threshold.
                  - fetch_k(int): Amount of documents to pass to MMR algorithm
                    (Default: 20).
                  - lambda_mult(float): Diversity of results returned by MMR;
                    1 for minimum diversity and 0 for maximum. (Default: 0.5).
        Returns:
            Retriever for this GraphVectorStore.
--- a/libs/core/langchain_core/graph_vectorstores/links.py
+++ b/libs/core/langchain_core/graph_vectorstores/links.py
@@ -43,6 +43,7 @@ METADATA_LINKS_KEY = "links"
@beta()
 def get_links(doc: Document) -> List[Link]:
    """Get the links from a document.
    Args:
        doc: The document to get the link tags from.
    Returns:
@@ -60,6 +61,7 @@ def get_links(doc: Document) -> List[Link]:
@beta()
 def add_links(doc: Document, *links: Union[Link, Iterable[Link]]) -> None:
    """Add links to the given metadata.
    Args:
        doc: The document to add the links to.
        *links: The links to add to the document.