diff --git a/libs/community/langchain_community/graph_vectorstores/extractors/keybert_link_extractor.py b/libs/community/langchain_community/graph_vectorstores/extractors/keybert_link_extractor.py
index 54be34d4631..90a208b9878 100644
--- a/libs/community/langchain_community/graph_vectorstores/extractors/keybert_link_extractor.py
+++ b/libs/community/langchain_community/graph_vectorstores/extractors/keybert_link_extractor.py
@@ -20,22 +20,114 @@ class KeybertLinkExtractor(LinkExtractor[KeybertInput]):
embedding_model: str = "all-MiniLM-L6-v2",
extract_keywords_kwargs: Optional[Dict[str, Any]] = None,
):
- """Extract keywords using KeyBERT .
+ """Extract keywords using `KeyBERT `_.
- Example:
+ KeyBERT is a minimal and easy-to-use keyword extraction technique that
+ leverages BERT embeddings to create keywords and keyphrases that are most
+ similar to a document.
- .. code-block:: python
+ The KeybertLinkExtractor uses KeyBERT to create links between documents that
+ have keywords in common.
- extractor = KeybertLinkExtractor()
+ Example::
- results = extractor.extract_one(PAGE_1)
+ extractor = KeybertLinkExtractor()
+ results = extractor.extract_one("lorem ipsum...")
+
+ .. seealso::
+
+ - :mod:`How to use a graph vector store `
+ - :class:`How to create links between documents `
+
+ How to link Documents on common keywords using Keybert
+ ======================================================
+
+ Preliminaries
+ -------------
+
+ Install the keybert package:
+
+ .. code-block:: bash
+
+ pip install -q langchain_community keybert
+
+ Usage
+ -----
+
+ We load the ``state_of_the_union.txt`` file, chunk it, then for each chunk we
+ extract keyword links and add them to the chunk.
+
+ Using extract_one()
+ ^^^^^^^^^^^^^^^^^^^
+
+ We can use :meth:`extract_one` on a document to get the links and add the links
+ to the document metadata with
+ :meth:`~langchain_core.graph_vectorstores.links.add_links`::
+
+ from langchain_community.document_loaders import TextLoader
+ from langchain_community.graph_vectorstores import CassandraGraphVectorStore
+ from langchain_community.graph_vectorstores.extractors import KeybertLinkExtractor
+ from langchain_core.graph_vectorstores.links import add_links
+ from langchain_text_splitters import CharacterTextSplitter
+
+ loader = TextLoader("state_of_the_union.txt")
+
+ raw_documents = loader.load()
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+
+ documents = text_splitter.split_documents(raw_documents)
+ keyword_extractor = KeybertLinkExtractor()
+
+ for document in documents:
+ links = keyword_extractor.extract_one(document)
+ add_links(document, links)
+
+ print(documents[0].metadata)
+
+ .. code-block:: output
+
+ {'source': 'state_of_the_union.txt', 'links': [Link(kind='kw', direction='bidir', tag='ukraine'), Link(kind='kw', direction='bidir', tag='ukrainian'), Link(kind='kw', direction='bidir', tag='putin'), Link(kind='kw', direction='bidir', tag='vladimir'), Link(kind='kw', direction='bidir', tag='russia')]}
+
+ Using LinkExtractorTransformer
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ Using the :class:`~langchain_community.graph_vectorstores.extractors.keybert_link_extractor.LinkExtractorTransformer`,
+ we can simplify the link extraction::
+
+ from langchain_community.document_loaders import TextLoader
+ from langchain_community.graph_vectorstores.extractors import (
+ KeybertLinkExtractor,
+ LinkExtractorTransformer,
+ )
+ from langchain_text_splitters import CharacterTextSplitter
+
+ loader = TextLoader("state_of_the_union.txt")
+ raw_documents = loader.load()
+
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+ documents = text_splitter.split_documents(raw_documents)
+
+ transformer = LinkExtractorTransformer([KeybertLinkExtractor()])
+ documents = transformer.transform_documents(documents)
+
+ print(documents[0].metadata)
+
+ .. code-block:: output
+
+ {'source': 'state_of_the_union.txt', 'links': [Link(kind='kw', direction='bidir', tag='ukraine'), Link(kind='kw', direction='bidir', tag='ukrainian'), Link(kind='kw', direction='bidir', tag='putin'), Link(kind='kw', direction='bidir', tag='vladimir'), Link(kind='kw', direction='bidir', tag='russia')]}
+
+ The documents with keyword links can then be added to a :class:`~langchain_core.graph_vectorstores.base.GraphVectorStore`::
+
+ from langchain_community.graph_vectorstores import CassandraGraphVectorStore
+
+ store = CassandraGraphVectorStore.from_documents(documents=documents, embedding=...)
Args:
kind: Kind of links to produce with this extractor.
embedding_model: Name of the embedding model to use with KeyBERT.
extract_keywords_kwargs: Keyword arguments to pass to KeyBERT's
- `extract_keywords` method.
- """
+ ``extract_keywords`` method.
+ """ # noqa: E501
try:
import keybert