mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 06:39:52 +00:00
[community]: Render documents to graphviz (#24830)
- **Description:** Adds a helper that renders documents with the GraphVectorStore metadata fields to Graphviz for visualization. This is helpful for understanding and debugging. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
fc8006121f
commit
008efada2c
@ -29,6 +29,7 @@ gliner>=0.2.7
|
||||
google-cloud-documentai>=2.20.1,<3
|
||||
gql>=3.4.1,<4
|
||||
gradientai>=1.4.0,<2
|
||||
graphviz>=0.20.3,<0.21
|
||||
hdbcli>=2.19.21,<3
|
||||
hologres-vector==0.0.6
|
||||
html2text>=2020.1.16
|
||||
|
@ -0,0 +1,122 @@
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Dict, Iterable, Optional, Tuple
|
||||
|
||||
from langchain_core._api import beta
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.graph_vectorstores.links import get_links
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import graphviz
|
||||
|
||||
|
||||
def _escape_id(id: str) -> str:
|
||||
return id.replace(":", "_")
|
||||
|
||||
|
||||
_EDGE_DIRECTION = {
|
||||
"in": "back",
|
||||
"out": "forward",
|
||||
"bidir": "both",
|
||||
}
|
||||
|
||||
_WORD_RE = re.compile("\s*\S+")
|
||||
|
||||
|
||||
def _split_prefix(s: str, max_chars: int = 50) -> str:
|
||||
words = _WORD_RE.finditer(s)
|
||||
|
||||
split = min(len(s), max_chars)
|
||||
for word in words:
|
||||
if word.end(0) > max_chars:
|
||||
break
|
||||
split = word.end(0)
|
||||
|
||||
if split == len(s):
|
||||
return s
|
||||
else:
|
||||
return f"{s[0:split]}..."
|
||||
|
||||
|
||||
@beta()
|
||||
def render_graphviz(
|
||||
documents: Iterable[Document],
|
||||
engine: Optional[str] = None,
|
||||
node_color: Optional[str] = None,
|
||||
node_colors: Optional[Dict[str, Optional[str]]] = None,
|
||||
skip_tags: Iterable[Tuple[str, str]] = (),
|
||||
) -> "graphviz.Digraph":
|
||||
"""Render a collection of GraphVectorStore documents to GraphViz format.
|
||||
|
||||
Args:
|
||||
documents: The documents to render.
|
||||
engine: GraphViz layout engine to use. `None` uses the default.
|
||||
node_color: Default node color.
|
||||
node_colors: Dictionary specifying colors of specific nodes. Useful for
|
||||
emphasizing nodes that were selected by MMR, or differ from other
|
||||
results.
|
||||
skip_tags: Set of tags to skip when rendering the graph. Specified as
|
||||
tuples containing the kind and tag.
|
||||
|
||||
Returns:
|
||||
The "graphviz.Digraph" representing the nodes. May be printed to source,
|
||||
or rendered using `dot`.
|
||||
|
||||
Note:
|
||||
To render the generated DOT source code, you also need to install Graphviz_
|
||||
(`download page <https://www.graphviz.org/download/>`_,
|
||||
`archived versions <https://www2.graphviz.org/Archive/stable/>`_,
|
||||
`installation procedure for Windows <https://forum.graphviz.org/t/new-simplified-installation-procedure-on-windows/224>`_).
|
||||
"""
|
||||
if node_colors is None:
|
||||
node_colors = {}
|
||||
|
||||
try:
|
||||
import graphviz
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
raise ImportError(
|
||||
"Could not import graphviz python package. "
|
||||
"Please install it with `pip install graphviz`."
|
||||
)
|
||||
|
||||
graph = graphviz.Digraph(engine=engine)
|
||||
graph.attr(rankdir="LR")
|
||||
graph.attr("node", style="filled")
|
||||
|
||||
skip_tags = set(skip_tags)
|
||||
tags: dict[Tuple[str, str], str] = {}
|
||||
|
||||
for document in documents:
|
||||
id = document.id
|
||||
if id is None:
|
||||
raise ValueError(f"Illegal graph document without ID: {document}")
|
||||
escaped_id = _escape_id(id)
|
||||
color = node_colors[id] if id in node_colors else node_color
|
||||
|
||||
node_label = "\n".join(
|
||||
[
|
||||
graphviz.escape(id),
|
||||
graphviz.escape(_split_prefix(document.page_content)),
|
||||
]
|
||||
)
|
||||
graph.node(
|
||||
escaped_id,
|
||||
label=node_label,
|
||||
shape="note",
|
||||
fillcolor=color,
|
||||
tooltip=graphviz.escape(document.page_content),
|
||||
)
|
||||
|
||||
for link in get_links(document):
|
||||
tag_key = (link.kind, link.tag)
|
||||
if tag_key in skip_tags:
|
||||
continue
|
||||
|
||||
tag_id = tags.get(tag_key)
|
||||
if tag_id is None:
|
||||
tag_id = f"tag_{len(tags)}"
|
||||
tags[tag_key] = tag_id
|
||||
graph.node(tag_id, label=graphviz.escape(f"{link.kind}:{link.tag}"))
|
||||
|
||||
graph.edge(escaped_id, tag_id, dir=_EDGE_DIRECTION[link.direction])
|
||||
return graph
|
@ -0,0 +1,113 @@
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.graph_vectorstores.links import METADATA_LINKS_KEY, Link
|
||||
from langchain_community.graph_vectorstores.visualize import render_graphviz
|
||||
|
||||
|
||||
@pytest.mark.requires("graphviz")
|
||||
def test_visualize_simple_graph() -> None:
|
||||
doc1 = Document(
|
||||
id="a",
|
||||
page_content="some content",
|
||||
metadata={
|
||||
METADATA_LINKS_KEY: [
|
||||
Link.incoming("href", "a"),
|
||||
Link.bidir("kw", "foo"),
|
||||
]
|
||||
},
|
||||
)
|
||||
doc2 = Document(
|
||||
id="b",
|
||||
page_content="<some\n more content>",
|
||||
metadata={
|
||||
METADATA_LINKS_KEY: [
|
||||
Link.incoming("href", "b"),
|
||||
Link.outgoing("href", "a"),
|
||||
Link.bidir("kw", "foo"),
|
||||
Link.bidir("kw", "bar"),
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
assert render_graphviz([doc1, doc2]).source == (
|
||||
"digraph {\n"
|
||||
"\trankdir=LR\n"
|
||||
"\tnode [style=filled]\n"
|
||||
'\ta [label="a\nsome content" shape=note tooltip="some content"]\n'
|
||||
'\ttag_0 [label="href:a"]\n'
|
||||
"\ta -> tag_0 [dir=back]\n"
|
||||
'\ttag_1 [label="kw:foo"]\n'
|
||||
"\ta -> tag_1 [dir=both]\n"
|
||||
'\tb [label="b\n<some\n more content>" '
|
||||
'shape=note tooltip="<some\n more content>"]\n'
|
||||
'\ttag_2 [label="href:b"]\n'
|
||||
"\tb -> tag_2 [dir=back]\n"
|
||||
"\tb -> tag_0 [dir=forward]\n"
|
||||
"\tb -> tag_1 [dir=both]\n"
|
||||
'\ttag_3 [label="kw:bar"]\n'
|
||||
"\tb -> tag_3 [dir=both]\n"
|
||||
"}\n"
|
||||
)
|
||||
|
||||
assert render_graphviz([doc1, doc2], engine="fdp").engine == "fdp"
|
||||
|
||||
assert render_graphviz([doc1, doc2], node_colors={"a": "gold"}).source == (
|
||||
"digraph {\n"
|
||||
"\trankdir=LR\n"
|
||||
"\tnode [style=filled]\n"
|
||||
'\ta [label="a\nsome content" fillcolor=gold '
|
||||
'shape=note tooltip="some content"]\n'
|
||||
'\ttag_0 [label="href:a"]\n'
|
||||
"\ta -> tag_0 [dir=back]\n"
|
||||
'\ttag_1 [label="kw:foo"]\n'
|
||||
"\ta -> tag_1 [dir=both]\n"
|
||||
'\tb [label="b\n<some\n more content>" '
|
||||
'shape=note tooltip="<some\n more content>"]\n'
|
||||
'\ttag_2 [label="href:b"]\n'
|
||||
"\tb -> tag_2 [dir=back]\n"
|
||||
"\tb -> tag_0 [dir=forward]\n"
|
||||
"\tb -> tag_1 [dir=both]\n"
|
||||
'\ttag_3 [label="kw:bar"]\n'
|
||||
"\tb -> tag_3 [dir=both]\n"
|
||||
"}\n"
|
||||
)
|
||||
|
||||
assert render_graphviz(
|
||||
[doc1, doc2], node_color="gold", node_colors={"a": None}
|
||||
).source == (
|
||||
"digraph {\n"
|
||||
"\trankdir=LR\n"
|
||||
"\tnode [style=filled]\n"
|
||||
'\ta [label="a\nsome content" shape=note tooltip="some content"]\n'
|
||||
'\ttag_0 [label="href:a"]\n'
|
||||
"\ta -> tag_0 [dir=back]\n"
|
||||
'\ttag_1 [label="kw:foo"]\n'
|
||||
"\ta -> tag_1 [dir=both]\n"
|
||||
'\tb [label="b\n<some\n more content>" fillcolor=gold '
|
||||
'shape=note tooltip="<some\n more content>"]\n'
|
||||
'\ttag_2 [label="href:b"]\n'
|
||||
"\tb -> tag_2 [dir=back]\n"
|
||||
"\tb -> tag_0 [dir=forward]\n"
|
||||
"\tb -> tag_1 [dir=both]\n"
|
||||
'\ttag_3 [label="kw:bar"]\n'
|
||||
"\tb -> tag_3 [dir=both]\n"
|
||||
"}\n"
|
||||
)
|
||||
|
||||
assert render_graphviz([doc1, doc2], skip_tags=[("kw", "foo")]).source == (
|
||||
"digraph {\n"
|
||||
"\trankdir=LR\n"
|
||||
"\tnode [style=filled]\n"
|
||||
'\ta [label="a\nsome content" shape=note tooltip="some content"]\n'
|
||||
'\ttag_0 [label="href:a"]\n'
|
||||
"\ta -> tag_0 [dir=back]\n"
|
||||
'\tb [label="b\n<some\n more content>" '
|
||||
'shape=note tooltip="<some\n more content>"]\n'
|
||||
'\ttag_1 [label="href:b"]\n'
|
||||
"\tb -> tag_1 [dir=back]\n"
|
||||
"\tb -> tag_0 [dir=forward]\n"
|
||||
'\ttag_2 [label="kw:bar"]\n'
|
||||
"\tb -> tag_2 [dir=both]\n"
|
||||
"}\n"
|
||||
)
|
Loading…
Reference in New Issue
Block a user