mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-19 19:11:33 +00:00
community[patch]: Add docstring for Links (#25969)
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
19ed3165fb
commit
2a6abd3f0a
@ -9,10 +9,113 @@ from langchain_core.documents import Document
|
|||||||
@beta()
|
@beta()
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Link:
|
class Link:
|
||||||
"""A link to/from a tag of a given tag.
|
"""A link to/from a tag of a given kind.
|
||||||
|
|
||||||
Edges exist from nodes with an outgoing link to nodes with a matching incoming link.
|
Documents in a :class:`graph vector store <langchain_community.graph_vectorstores.base.GraphVectorStore>`
|
||||||
"""
|
are connected via "links".
|
||||||
|
Links form a bipartite graph between documents and tags: documents are connected
|
||||||
|
to tags, and tags are connected to other documents.
|
||||||
|
When documents are retrieved from a graph vector store, a pair of documents are
|
||||||
|
connected with a depth of one if both documents are connected to the same tag.
|
||||||
|
|
||||||
|
Links have a ``kind`` property, used to namespace different tag identifiers.
|
||||||
|
For example a link to a keyword might use kind ``kw``, while a link to a URL might
|
||||||
|
use kind ``url``.
|
||||||
|
This allows the same tag value to be used in different contexts without causing
|
||||||
|
name collisions.
|
||||||
|
|
||||||
|
Links are directed. The directionality of links controls how the graph is
|
||||||
|
traversed at retrieval time.
|
||||||
|
For example, given documents ``A`` and ``B``, connected by links to tag ``T``:
|
||||||
|
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| A to T | B to T | Result |
|
||||||
|
+==========+==========+=================================+
|
||||||
|
| outgoing | incoming | Retrieval traverses from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| incoming | incoming | No traversal from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| outgoing | incoming | No traversal from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| bidir | incoming | Retrieval traverses from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| bidir | outgoing | No traversal from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| outgoing | bidir | Retrieval traverses from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
| incoming | bidir | No traversal from A to B |
|
||||||
|
+----------+----------+---------------------------------+
|
||||||
|
|
||||||
|
Directed links make it possible to describe relationships such as term
|
||||||
|
references / definitions: term definitions are generally relevant to any documents
|
||||||
|
that use the term, but the full set of documents using a term generally aren't
|
||||||
|
relevant to the term's definition.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
- :mod:`How to use a graph vector store <langchain_community.graph_vectorstores>`
|
||||||
|
- :class:`How to link Documents on hyperlinks in HTML <langchain_community.graph_vectorstores.extractors.html_link_extractor.HtmlLinkExtractor>`
|
||||||
|
- :class:`How to link Documents on common keywords (using KeyBERT) <langchain_community.graph_vectorstores.extractors.keybert_link_extractor.KeybertLinkExtractor>`
|
||||||
|
- :class:`How to link Documents on common named entities (using GliNER) <langchain_community.graph_vectorstores.extractors.gliner_link_extractor.GLiNERLinkExtractor>`
|
||||||
|
|
||||||
|
How to add links to a Document
|
||||||
|
==============================
|
||||||
|
|
||||||
|
How to create links
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
You can create links using the Link class's constructors :meth:`incoming`,
|
||||||
|
:meth:`outgoing`, and :meth:`bidir`::
|
||||||
|
|
||||||
|
from langchain_community.graph_vectorstores.links import Link
|
||||||
|
|
||||||
|
print(Link.bidir(kind="location", tag="Paris"))
|
||||||
|
|
||||||
|
.. code-block:: output
|
||||||
|
|
||||||
|
Link(kind='location', direction='bidir', tag='Paris')
|
||||||
|
|
||||||
|
Extending documents with links
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Now that we know how to create links, let's associate them with some documents.
|
||||||
|
These edges will strengthen the connection between documents that share a keyword
|
||||||
|
when using a graph vector store to retrieve documents.
|
||||||
|
|
||||||
|
First, we'll load some text and chunk it into smaller pieces.
|
||||||
|
Then we'll add a link to each document to link them all together::
|
||||||
|
|
||||||
|
from langchain_community.document_loaders import TextLoader
|
||||||
|
from langchain_community.graph_vectorstores.links import add_links
|
||||||
|
from langchain_text_splitters import CharacterTextSplitter
|
||||||
|
|
||||||
|
loader = TextLoader("state_of_the_union.txt")
|
||||||
|
|
||||||
|
raw_documents = loader.load()
|
||||||
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||||
|
documents = text_splitter.split_documents(raw_documents)
|
||||||
|
|
||||||
|
for doc in documents:
|
||||||
|
add_links(doc, Link.bidir(kind="genre", tag="oratory"))
|
||||||
|
|
||||||
|
print(documents[0].metadata)
|
||||||
|
|
||||||
|
.. code-block:: output
|
||||||
|
|
||||||
|
{'source': 'state_of_the_union.txt', 'links': [Link(kind='genre', direction='bidir', tag='oratory')]}
|
||||||
|
|
||||||
|
As we can see, each document's metadata now includes a bidirectional link to the
|
||||||
|
genre ``oratory``.
|
||||||
|
|
||||||
|
The documents can then be added to a graph vector store::
|
||||||
|
|
||||||
|
from langchain_community.graph_vectorstores import CassandraGraphVectorStore
|
||||||
|
|
||||||
|
graph_vectorstore = CassandraGraphVectorStore.from_documents(
|
||||||
|
documents=documents, embeddings=...
|
||||||
|
)
|
||||||
|
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
kind: str
|
kind: str
|
||||||
"""The kind of link. Allows different extractors to use the same tag name without
|
"""The kind of link. Allows different extractors to use the same tag name without
|
||||||
@ -24,17 +127,32 @@ class Link:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def incoming(kind: str, tag: str) -> "Link":
|
def incoming(kind: str, tag: str) -> "Link":
|
||||||
"""Create an incoming link."""
|
"""Create an incoming link.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kind: the link kind.
|
||||||
|
tag: the link tag.
|
||||||
|
"""
|
||||||
return Link(kind=kind, direction="in", tag=tag)
|
return Link(kind=kind, direction="in", tag=tag)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def outgoing(kind: str, tag: str) -> "Link":
|
def outgoing(kind: str, tag: str) -> "Link":
|
||||||
"""Create an outgoing link."""
|
"""Create an outgoing link.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kind: the link kind.
|
||||||
|
tag: the link tag.
|
||||||
|
"""
|
||||||
return Link(kind=kind, direction="out", tag=tag)
|
return Link(kind=kind, direction="out", tag=tag)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bidir(kind: str, tag: str) -> "Link":
|
def bidir(kind: str, tag: str) -> "Link":
|
||||||
"""Create a bidirectional link."""
|
"""Create a bidirectional link.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kind: the link kind.
|
||||||
|
tag: the link tag.
|
||||||
|
"""
|
||||||
return Link(kind=kind, direction="bidir", tag=tag)
|
return Link(kind=kind, direction="bidir", tag=tag)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user