community: Add docstring for GLiNERLinkExtractor (#26218)

Co-authored-by: Erick Friis <erick@langchain.dev>
2025-08-15 23:57:21 +00:00 · 2024-09-10 02:27:23 +02:00 · 2024-09-10 02:27:23 +02:00 · 56580b5fff
commit 56580b5fff
parent e235a572a0
1 changed files with 111 additions and 18 deletions
--- a/libs/community/langchain_community/graph_vectorstores/extractors/gliner_link_extractor.py
+++ b/libs/community/langchain_community/graph_vectorstores/extractors/gliner_link_extractor.py
@ -14,7 +14,117 @@ GLiNERInput = Union[str, Document]

@beta()
 class GLiNERLinkExtractor(LinkExtractor[GLiNERInput]):
-    """Link documents with common named entities using GLiNER <https://github.com/urchade/GLiNER>."""
+    """Link documents with common named entities using `GLiNER`_.
+
+    `GLiNER`_ is a Named Entity Recognition (NER) model capable of identifying any
+    entity type using a bidirectional transformer encoder (BERT-like).
+
+    The ``GLiNERLinkExtractor`` uses GLiNER to create links between documents that
+    have named entities in common.
+
+    Example::
+
+        extractor = GLiNERLinkExtractor(
+            labels=["Person", "Award", "Date", "Competitions", "Teams"]
+        )
+        results = extractor.extract_one("some long text...")
+
+    .. _GLiNER: https://github.com/urchade/GLiNER
+
+    .. seealso::
+
+            - :mod:`How to use a graph vector store <langchain_community.graph_vectorstores>`
+            - :class:`How to create links between documents <langchain_core.graph_vectorstores.links.Link>`
+
+    How to link Documents on common named entities
+    ==============================================
+
+    Preliminaries
+    -------------
+
+    Install the ``gliner`` package:
+
+    .. code-block:: bash
+
+        pip install -q langchain_community gliner
+
+    Usage
+    -----
+
+    We load the ``state_of_the_union.txt`` file, chunk it, then for each chunk we
+    extract named entity links and add them to the chunk.
+
+    Using extract_one()
+    ^^^^^^^^^^^^^^^^^^^
+
+    We can use :meth:`extract_one` on a document to get the links and add the links
+    to the document metadata with
+    :meth:`~langchain_core.graph_vectorstores.links.add_links`::
+
+        from langchain_community.document_loaders import TextLoader
+        from langchain_community.graph_vectorstores import CassandraGraphVectorStore
+        from langchain_community.graph_vectorstores.extractors import GLiNERLinkExtractor
+        from langchain_core.graph_vectorstores.links import add_links
+        from langchain_text_splitters import CharacterTextSplitter
+
+        loader = TextLoader("state_of_the_union.txt")
+        raw_documents = loader.load()
+
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+        documents = text_splitter.split_documents(raw_documents)
+
+        ner_extractor = GLiNERLinkExtractor(["Person", "Topic"])
+        for document in documents:
+            links = ner_extractor.extract_one(document)
+            add_links(document, links)
+
+        print(documents[0].metadata)
+
+    .. code-block:: output
+
+        {'source': 'state_of_the_union.txt', 'links': [Link(kind='entity:Person', direction='bidir', tag='President Zelenskyy'), Link(kind='entity:Person', direction='bidir', tag='Vladimir Putin')]}
+
+    Using LinkExtractorTransformer
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    Using the :class:`~langchain_community.graph_vectorstores.extractors.keybert_link_extractor.LinkExtractorTransformer`,
+    we can simplify the link extraction::
+
+        from langchain_community.document_loaders import TextLoader
+        from langchain_community.graph_vectorstores.extractors import (
+            GLiNERLinkExtractor,
+            LinkExtractorTransformer,
+        )
+        from langchain_text_splitters import CharacterTextSplitter
+
+        loader = TextLoader("state_of_the_union.txt")
+        raw_documents = loader.load()
+
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+        documents = text_splitter.split_documents(raw_documents)
+
+        ner_extractor = GLiNERLinkExtractor(["Person", "Topic"])
+        transformer = LinkExtractorTransformer([ner_extractor])
+        documents = transformer.transform_documents(documents)
+
+        print(documents[0].metadata)
+
+    .. code-block:: output
+
+        {'source': 'state_of_the_union.txt', 'links': [Link(kind='entity:Person', direction='bidir', tag='President Zelenskyy'), Link(kind='entity:Person', direction='bidir', tag='Vladimir Putin')]}
+
+    The documents with named entity links can then be added to a :class:`~langchain_core.graph_vectorstores.base.GraphVectorStore`::
+
+        from langchain_community.graph_vectorstores import CassandraGraphVectorStore
+
+        store = CassandraGraphVectorStore.from_documents(documents=documents, embedding=...)
+
+    Args:
+        labels: List of kinds of entities to extract.
+        kind: Kind of links to produce with this extractor.
+        model: GLiNER model to use.
+        extract_kwargs: Keyword arguments to pass to GLiNER.
+    """  # noqa: E501

    def __init__(
        self,
@ -24,23 +134,6 @@ class GLiNERLinkExtractor(LinkExtractor[GLiNERInput]):
        model: str = "urchade/gliner_mediumv2.1",
        extract_kwargs: Optional[Dict[str, Any]] = None,
    ):
-        """Extract keywords using GLiNER.
-
-        Example:
-
-            .. code-block:: python
-
-                extractor = GLiNERLinkExtractor(
-                    labels=["Person", "Award", "Date", "Competitions", "Teams"]
-                )
-                results = extractor.extract_one("some long text...")
-
-        Args:
-            labels: List of kinds of entities to extract.
-            kind: Kind of links to produce with this extractor.
-            model: GLiNER model to use.
-            extract_kwargs: Keyword arguments to pass to GLiNER.
-        """
        try:
            from gliner import GLiNER