mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 06:53:16 +00:00
core[minor]: Add Graph Store component (#23092)
This PR introduces a GraphStore component. GraphStore extends VectorStore with the concept of links between documents based on document metadata. This allows linking documents based on a variety of techniques, including common keywords, explicit links in the content, and other patterns. This works with existing Documents, so it’s easy to extend existing VectorStores to be used as GraphStores. The interface can be implemented for any Vector Store technology that supports metadata, not only graph DBs. When retrieving documents for a given query, the first level of search is done using classical similarity search. Next, links may be followed using various traversal strategies to get additional documents. This allows documents to be retrieved that aren’t directly similar to the query but contain relevant information. 2 retrieving methods are added to the VectorStore ones : * traversal_search which gets all linked documents up to a certain depth * mmr_traversal_search which selects linked documents using an MMR algorithm to have more diverse results. If a depth of retrieval of 0 is used, GraphStore is effectively a VectorStore. It enables an easy transition from a simple VectorStore to GraphStore by adding links between documents as a second step. An implementation for Apache Cassandra is also proposed. See https://github.com/datastax/ragstack-ai/blob/main/libs/knowledge-store/notebooks/astra_support.ipynb for a notebook explaining how to use GraphStore and that shows that it can answer correctly to questions that a simple VectorStore cannot. **Twitter handle:** _cbornet
This commit is contained in:
committed by
GitHub
parent
77f5fc3d55
commit
42d049f618
59
libs/core/tests/unit_tests/test_graph_vectorstores.py
Normal file
59
libs/core/tests/unit_tests/test_graph_vectorstores.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import pytest
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.graph_vectorstores.base import (
|
||||
Node,
|
||||
_documents_to_nodes,
|
||||
_texts_to_nodes,
|
||||
)
|
||||
from langchain_core.graph_vectorstores.links import Link
|
||||
|
||||
|
||||
def test_texts_to_nodes() -> None:
|
||||
assert list(_texts_to_nodes(["a", "b"], [{"a": "b"}, {"c": "d"}], ["a", "b"])) == [
|
||||
Node(id="a", metadata={"a": "b"}, text="a"),
|
||||
Node(id="b", metadata={"c": "d"}, text="b"),
|
||||
]
|
||||
assert list(_texts_to_nodes(["a", "b"], None, ["a", "b"])) == [
|
||||
Node(id="a", metadata={}, text="a"),
|
||||
Node(id="b", metadata={}, text="b"),
|
||||
]
|
||||
assert list(_texts_to_nodes(["a", "b"], [{"a": "b"}, {"c": "d"}], None)) == [
|
||||
Node(metadata={"a": "b"}, text="a"),
|
||||
Node(metadata={"c": "d"}, text="b"),
|
||||
]
|
||||
assert list(
|
||||
_texts_to_nodes(
|
||||
["a"],
|
||||
[{"links": {Link.incoming(kind="hyperlink", tag="http://b")}}],
|
||||
None,
|
||||
)
|
||||
) == [Node(links=[Link.incoming(kind="hyperlink", tag="http://b")], text="a")]
|
||||
with pytest.raises(ValueError):
|
||||
list(_texts_to_nodes(["a", "b"], None, ["a"]))
|
||||
with pytest.raises(ValueError):
|
||||
list(_texts_to_nodes(["a", "b"], [{"a": "b"}], None))
|
||||
with pytest.raises(ValueError):
|
||||
list(_texts_to_nodes(["a"], [{"a": "b"}, {"c": "d"}], None))
|
||||
with pytest.raises(ValueError):
|
||||
list(_texts_to_nodes(["a"], None, ["a", "b"]))
|
||||
|
||||
|
||||
def test_documents_to_nodes() -> None:
|
||||
documents = [
|
||||
Document(
|
||||
id="a",
|
||||
page_content="some text a",
|
||||
metadata={"links": [Link.incoming(kind="hyperlink", tag="http://b")]},
|
||||
),
|
||||
Document(id="b", page_content="some text b", metadata={"c": "d"}),
|
||||
]
|
||||
assert list(_documents_to_nodes(documents)) == [
|
||||
Node(
|
||||
id="a",
|
||||
metadata={},
|
||||
links=[Link.incoming(kind="hyperlink", tag="http://b")],
|
||||
text="some text a",
|
||||
),
|
||||
Node(id="b", metadata={"c": "d"}, text="some text b"),
|
||||
]
|
Reference in New Issue
Block a user