update llm graph transformer documentation (#27905)

2026-07-15 07:00:38 +00:00 · 2024-11-05 23:54:26 +07:00
parent 31f4fb790d
commit a3bbbe6a86
3 changed files with 110 additions and 10 deletions
--- a/docs/docs/how_to/graph_constructing.ipynb
+++ b/docs/docs/how_to/graph_constructing.ipynb
@@ -44,6 +44,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
@@ -105,7 +108,7 @@
    "os.environ[\"NEO4J_USERNAME\"] = \"neo4j\"\n",
    "os.environ[\"NEO4J_PASSWORD\"] = \"password\"\n",
    "\n",
-    "graph = Neo4jGraph()"
+    "graph = Neo4jGraph(refresh_schema=False)"
   ]
  },
  {
@@ -149,8 +152,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='University Of Paris', type='Organization')]\n",
-      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='MARRIED'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='PROFESSOR')]\n"
+      "Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]\n",
+      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='MARRIED', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='PROFESSOR', properties={})]\n"
     ]
    }
   ],
@@ -191,8 +194,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='University Of Paris', type='Organization')]\n",
-      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='SPOUSE'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='WORKED_AT')]\n"
+      "Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]\n",
+      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]\n"
     ]
    }
   ],
@@ -209,6 +212,44 @@
    "print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To define the graph schema more precisely, consider using a three-tuple approach for relationships. In this approach, each tuple consists of three elements: the source node, the relationship type, and the target node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nodes:[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})]\n",
+      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]\n"
+     ]
+    }
+   ],
+   "source": [
+    "allowed_relationships = [\n",
+    "    (\"Person\", \"SPOUSE\", \"Person\"),\n",
+    "    (\"Person\", \"NATIONALITY\", \"Country\"),\n",
+    "    (\"Person\", \"WORKED_AT\", \"Organization\"),\n",
+    "]\n",
+    "\n",
+    "llm_transformer_tuple = LLMGraphTransformer(\n",
+    "    llm=llm,\n",
+    "    allowed_nodes=[\"Person\", \"Country\", \"Organization\"],\n",
+    "    allowed_relationships=allowed_relationships,\n",
+    ")\n",
+    "llm_transformer_tuple = llm_transformer_filtered.convert_to_graph_documents(documents)\n",
+    "print(f\"Nodes:{graph_documents_filtered[0].nodes}\")\n",
+    "print(f\"Relationships:{graph_documents_filtered[0].relationships}\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -229,15 +270,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Nodes:[Node(id='Marie Curie', type='Person', properties={'born_year': '1867'}), Node(id='Pierre Curie', type='Person'), Node(id='University Of Paris', type='Organization')]\n",
-      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='SPOUSE'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='WORKED_AT')]\n"
+      "Nodes:[Node(id='Marie Curie', type='Person', properties={'born_year': '1867'}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={}), Node(id='Poland', type='Country', properties={}), Node(id='France', type='Country', properties={})]\n",
+      "Relationships:[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Poland', type='Country', properties={}), type='NATIONALITY', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='France', type='Country', properties={}), type='NATIONALITY', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='WORKED_AT', properties={})]\n"
     ]
    }
   ],
@@ -264,12 +305,71 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "graph.add_graph_documents(graph_documents_props)"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Most graph databases support indexes to optimize data import and retrieval. Since we might not know all the node labels in advance, we can handle this by adding a secondary base label to each node using the `baseEntityLabel` parameter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph.add_graph_documents(graph_documents, baseEntityLabel=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Results will look like:\n",
+    "\n",
+    "![graph_construction3.png](../../static/img/graph_construction3.png)\n",
+    "\n",
+    "The final option is to also import the source documents for the extracted nodes and relationships. This approach lets us track which documents each entity appeared in."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph.add_graph_documents(graph_documents, include_source=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Graph will have the following structure:\n",
+    "\n",
+    "![graph_construction4.png](../../static/img/graph_construction4.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this visualization, the source document is highlighted in blue, with all entities extracted from it connected by `MENTIONS` relationships."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@@ -288,7 +388,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.5"
  }
 },
 "nbformat": 4,
--- a/docs/static/img/graph_construction3.png
+++ b/docs/static/img/graph_construction3.png
--- a/docs/static/img/graph_construction4.png
+++ b/docs/static/img/graph_construction4.png