community[patch]: Neo4j enhanced schema (#20983)

Scan the database for example values and provide them to an LLM for better inference of Text2cypher
2025-09-27 06:18:05 +00:00 · 2024-04-29 16:45:55 +02:00
parent dc70c23a11
commit 67428c4052
3 changed files with 423 additions and 92 deletions
--- a/docs/docs/integrations/graphs/neo4j_cypher.ipynb
+++ b/docs/docs/integrations/graphs/neo4j_cypher.ipynb
@@ -21,7 +21,7 @@
   "id": "dbc0ee68",
   "metadata": {},
   "source": [
-    "## Settin up\n",
+    "## Setting up\n",
    "\n",
    "You will need to have a running `Neo4j` instance. One option is to create a [free Neo4j database instance in their Aura cloud service](https://neo4j.com/cloud/platform/aura-graph-database/). You can also run the database locally using the [Neo4j Desktop application](https://neo4j.com/download/), or running a docker container.\n",
    "You can run a local docker container by running the executing the following script:\n",
@@ -31,7 +31,7 @@
    "    --name neo4j \\\n",
    "    -p 7474:7474 -p 7687:7687 \\\n",
    "    -d \\\n",
-    "    -e NEO4J_AUTH=neo4j/pleaseletmein \\\n",
+    "    -e NEO4J_AUTH=neo4j/password \\\n",
    "    -e NEO4J_PLUGINS=\\[\\\"apoc\\\"\\]  \\\n",
    "    neo4j:latest\n",
    "```\n",
@@ -58,9 +58,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "graph = Neo4jGraph(\n",
-    "    url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
-    ")"
+    "graph = Neo4jGraph(url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"password\")"
   ]
  },
  {
@@ -93,7 +91,7 @@
   "source": [
    "graph.query(\n",
    "    \"\"\"\n",
-    "MERGE (m:Movie {name:\"Top Gun\"})\n",
+    "MERGE (m:Movie {name:\"Top Gun\", runtime: 120})\n",
    "WITH m\n",
    "UNWIND [\"Tom Cruise\", \"Val Kilmer\", \"Anthony Edwards\", \"Meg Ryan\"] AS actor\n",
    "MERGE (a:Actor {name:actor})\n",
@@ -131,11 +129,12 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Node properties are the following:\n",
-      "Movie {name: STRING},Actor {name: STRING}\n",
-      "Relationship properties are the following:\n",
+      "Node properties:\n",
+      "Movie {runtime: INTEGER, name: STRING}\n",
+      "Actor {name: STRING}\n",
+      "Relationship properties:\n",
      "\n",
-      "The relationships are the following:\n",
+      "The relationships:\n",
      "(:Actor)-[:ACTED_IN]->(:Movie)\n"
     ]
    }
@@ -144,6 +143,48 @@
    "print(graph.schema)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "3d88f516-2e60-4da4-b25f-dad5801fe133",
+   "metadata": {},
+   "source": [
+    "## Enhanced schema information\n",
+    "Choosing the enhanced schema version enables the system to automatically scan for example values within the databases and calculate some distribution metrics. For example, if a node property has less than 10 distinct values, we return all possible values in the schema. Otherwise, return only a single example value per node and relationship property."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c8233976-1ca7-4f8f-af20-e8fb3e081fdd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Node properties:\n",
+      "- **Movie**\n",
+      "  - `runtime: INTEGER` Min: 120, Max: 120\n",
+      "  - `name: STRING` Available options: ['Top Gun']\n",
+      "- **Actor**\n",
+      "  - `name: STRING` Available options: ['Tom Cruise', 'Val Kilmer', 'Anthony Edwards', 'Meg Ryan']\n",
+      "Relationship properties:\n",
+      "\n",
+      "The relationships:\n",
+      "(:Actor)-[:ACTED_IN]->(:Movie)\n"
+     ]
+    }
+   ],
+   "source": [
+    "enhanced_graph = Neo4jGraph(\n",
+    "    url=\"bolt://localhost:7687\",\n",
+    "    username=\"neo4j\",\n",
+    "    password=\"password\",\n",
+    "    enhanced_schema=True,\n",
+    ")\n",
+    "print(enhanced_graph.schema)"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "68a3c677",
@@ -156,7 +197,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "7476ce98",
   "metadata": {},
   "outputs": [],
@@ -168,7 +209,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "ef8ee27b",
   "metadata": {},
   "outputs": [
@@ -180,10 +221,11 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
+      "WHERE m.name = 'Top Gun'\n",
      "RETURN a.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -191,16 +233,17 @@
    {
     "data": {
      "text/plain": [
-       "'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
+       "{'query': 'Who played in Top Gun?',\n",
+       " 'result': 'Anthony Edwards, Meg Ryan, Val Kilmer, Tom Cruise played in Top Gun.'}"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"Who played in Top Gun?\")"
+    "chain.invoke({\"query\": \"Who played in Top Gun?\"})"
   ]
  },
  {
@@ -215,7 +258,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "df230946",
   "metadata": {},
   "outputs": [],
@@ -227,7 +270,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "3f1600ee",
   "metadata": {},
   "outputs": [
@@ -239,10 +282,11 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
+      "WHERE m.name = 'Top Gun'\n",
      "RETURN a.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -250,16 +294,17 @@
    {
     "data": {
      "text/plain": [
-       "'Tom Cruise and Val Kilmer played in Top Gun.'"
+       "{'query': 'Who played in Top Gun?',\n",
+       " 'result': 'Anthony Edwards, Meg Ryan played in Top Gun.'}"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"Who played in Top Gun?\")"
+    "chain.invoke({\"query\": \"Who played in Top Gun?\"})"
   ]
  },
  {
@@ -273,7 +318,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "e412f36b",
   "metadata": {},
   "outputs": [],
@@ -285,7 +330,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "4f4699dc",
   "metadata": {},
   "outputs": [
@@ -297,19 +342,20 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
+      "WHERE m.name = 'Top Gun'\n",
      "RETURN a.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\\nRETURN a.name\"}, {'context': [{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]}]\n",
-      "Final answer: Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.\n"
+      "Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\\nWHERE m.name = 'Top Gun'\\nRETURN a.name\"}, {'context': [{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]}]\n",
+      "Final answer: Anthony Edwards, Meg Ryan, Val Kilmer, Tom Cruise played in Top Gun.\n"
     ]
    }
   ],
   "source": [
-    "result = chain(\"Who played in Top Gun?\")\n",
+    "result = chain.invoke({\"query\": \"Who played in Top Gun?\"})\n",
    "print(f\"Intermediate steps: {result['intermediate_steps']}\")\n",
    "print(f\"Final answer: {result['result']}\")"
   ]
@@ -325,7 +371,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "2d3acf10",
   "metadata": {},
   "outputs": [],
@@ -337,7 +383,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
   "id": "b0a9d143",
   "metadata": {},
   "outputs": [
@@ -349,7 +395,8 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
+      "WHERE m.name = 'Top Gun'\n",
      "RETURN a.name\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
@@ -358,19 +405,20 @@
    {
     "data": {
      "text/plain": [
-       "[{'a.name': 'Tom Cruise'},\n",
-       " {'a.name': 'Val Kilmer'},\n",
-       " {'a.name': 'Anthony Edwards'},\n",
-       " {'a.name': 'Meg Ryan'}]"
+       "{'query': 'Who played in Top Gun?',\n",
+       " 'result': [{'a.name': 'Anthony Edwards'},\n",
+       "  {'a.name': 'Meg Ryan'},\n",
+       "  {'a.name': 'Val Kilmer'},\n",
+       "  {'a.name': 'Tom Cruise'}]}"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"Who played in Top Gun?\")"
+    "chain.invoke({\"query\": \"Who played in Top Gun?\"})"
   ]
  },
  {
@@ -384,7 +432,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
   "id": "59baeb88-adfa-4c26-8334-fcbff3a98efb",
   "metadata": {},
   "outputs": [],
@@ -422,7 +470,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
   "id": "47c64027-cf42-493a-9c76-2d10ba753728",
   "metadata": {},
   "outputs": [
@@ -434,7 +482,7 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (m:Movie {name:\"Top Gun\"})<-[:ACTED_IN]-(:Actor)\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (:Movie {name:\"Top Gun\"})<-[:ACTED_IN]-()\n",
      "RETURN count(*) AS numberOfActors\u001b[0m\n",
      "Full Context:\n",
      "\u001b[32;1m\u001b[1;3m[{'numberOfActors': 4}]\u001b[0m\n",
@@ -445,16 +493,17 @@
    {
     "data": {
      "text/plain": [
-       "'Four people played in Top Gun.'"
+       "{'query': 'How many people played in Top Gun?',\n",
+       " 'result': 'There were 4 actors who played in Top Gun.'}"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"How many people played in Top Gun?\")"
+    "chain.invoke({\"query\": \"How many people played in Top Gun?\"})"
   ]
  },
  {
@@ -468,7 +517,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
   "id": "6f9becc2-f579-45bf-9b50-2ce02bde92da",
   "metadata": {},
   "outputs": [],
@@ -483,7 +532,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
   "id": "ff18e3e3-3402-4683-aec4-a19898f23ca1",
   "metadata": {},
   "outputs": [
@@ -495,10 +544,11 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
+      "WHERE m.name = 'Top Gun'\n",
      "RETURN a.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -506,16 +556,17 @@
    {
     "data": {
      "text/plain": [
-       "'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
+       "{'query': 'Who played in Top Gun?',\n",
+       " 'result': 'Anthony Edwards, Meg Ryan, Val Kilmer, and Tom Cruise played in Top Gun.'}"
      ]
     },
-     "execution_count": 17,
+     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"Who played in Top Gun?\")"
+    "chain.invoke({\"query\": \"Who played in Top Gun?\"})"
   ]
  },
  {
@@ -530,7 +581,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
   "id": "a20fa21e-fb85-41c4-aac0-53fb25e34604",
   "metadata": {},
   "outputs": [],
@@ -546,7 +597,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
   "id": "3ad7f6b8-543e-46e4-a3b2-40fa3e66e895",
   "metadata": {},
   "outputs": [
@@ -579,7 +630,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
   "id": "53665d03-7afd-433c-bdd5-750127bfb152",
   "metadata": {},
   "outputs": [],
@@ -594,7 +645,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 22,
   "id": "19e1a591-9c10-4d7b-aa36-a5e1b778a97b",
   "metadata": {},
   "outputs": [
@@ -606,10 +657,11 @@
      "\n",
      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
      "Generated Cypher:\n",
-      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
+      "WHERE m.name = 'Top Gun'\n",
      "RETURN a.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -617,16 +669,17 @@
    {
     "data": {
      "text/plain": [
-       "'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
+       "{'query': 'Who played in Top Gun?',\n",
+       " 'result': 'Anthony Edwards, Meg Ryan, Val Kilmer, Tom Cruise played in Top Gun.'}"
      ]
     },
-     "execution_count": 21,
+     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain.run(\"Who played in Top Gun?\")"
+    "chain.invoke({\"query\": \"Who played in Top Gun?\"})"
   ]
  },
  {
@@ -654,7 +707,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.9.18"
  }
 },
 "nbformat": 4,
--- a/libs/community/langchain_community/graphs/neo4j_graph.py
+++ b/libs/community/langchain_community/graphs/neo4j_graph.py
@@ -9,6 +9,11 @@ from langchain_community.graphs.graph_store import GraphStore
 BASE_ENTITY_LABEL = "__Entity__"
 EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"]
 EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"]
+EXHAUSTIVE_SEARCH_LIMIT = 10000
+LIST_LIMIT = 128
+# Threshold for returning all available prop values in graph schema
+DISTINCT_VALUE_LIMIT = 10
+NL = "\n"

 node_properties_query = """
 CALL apoc.meta.data()
@@ -56,7 +61,6 @@ def value_sanitize(d: Any) -> Any:
    results, can occupy significant context space and detract from
    the LLM's performance by introducing unnecessary noise and cost.
    """
-    LIST_LIMIT = 128
    if isinstance(d, dict):
        new_dict = {}
        for key, value in d.items():
@@ -135,6 +139,223 @@ def _get_rel_import_query(baseEntityLabel: bool) -> str:
        )


+def _enhanced_schema_cypher(
+    label_or_type: str,
+    properties: List[Dict[str, Any]],
+    exhaustive: bool,
+    is_relationship: bool = False,
+) -> str:
+    if is_relationship:
+        match_clause = f"MATCH ()-[n:{label_or_type}]->()"
+    else:
+        match_clause = f"MATCH (n:{label_or_type})"
+
+    with_clauses = []
+    return_clauses = []
+    output_dict = {}
+    if exhaustive:
+        for prop in properties:
+            prop_name = prop["property"]
+            prop_type = prop["type"]
+            if prop_type == "STRING":
+                with_clauses.append(
+                    (
+                        f"collect(distinct substring(n.`{prop_name}`, 0, 50)) "
+                        f"AS `{prop_name}_values`"
+                    )
+                )
+                return_clauses.append(
+                    (
+                        f"values:`{prop_name}_values`[..{DISTINCT_VALUE_LIMIT}],"
+                        f" distinct_count: size(`{prop_name}_values`)"
+                    )
+                )
+            elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
+                with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`")
+                with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`")
+                with_clauses.append(
+                    f"count(distinct n.`{prop_name}`) AS `{prop_name}_distinct`"
+                )
+                return_clauses.append(
+                    (
+                        f"min: toString(`{prop_name}_min`), "
+                        f"max: toString(`{prop_name}_max`), "
+                        f"distinct_count: `{prop_name}_distinct`"
+                    )
+                )
+            elif prop_type == "LIST":
+                with_clauses.append(
+                    (
+                        f"min(size(n.`{prop_name}`)) AS `{prop_name}_size_min`, "
+                        f"max(size(n.`{prop_name}`)) AS `{prop_name}_size_max`"
+                    )
+                )
+                return_clauses.append(
+                    f"min_size: `{prop_name}_size_min`, "
+                    f"max_size: `{prop_name}_size_max`"
+                )
+
+            output_dict[prop_name] = "{" + return_clauses.pop() + "}"
+    else:
+        # Just sample 5 random nodes
+        match_clause += " WITH n LIMIT 5"
+        for prop in properties:
+            prop_name = prop["property"]
+            prop_type = prop["type"]
+            if prop_type == "STRING":
+                with_clauses.append(
+                    (
+                        f"collect(distinct substring(n.`{prop_name}`, 0, 50)) "
+                        f"AS `{prop_name}_values`"
+                    )
+                )
+                return_clauses.append(f"values: `{prop_name}_values`")
+            elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
+                with_clauses.append(
+                    f"collect(distinct toString(n.`{prop_name}`)) "
+                    f"AS `{prop_name}_values`"
+                )
+                return_clauses.append(f"values: `{prop_name}_values`")
+            elif prop_type == "LIST":
+                with_clauses.append(
+                    (
+                        f"min(size(n.`{prop_name}`)) AS `{prop_name}_size_min`, "
+                        f"max(size(n.`{prop_name}`)) AS `{prop_name}_size_max`"
+                    )
+                )
+                return_clauses.append(
+                    f"min_size: `{prop_name}_size_min`,max_size: `{prop_name}_size_max`"
+                )
+
+            output_dict[prop_name] = "{" + return_clauses.pop() + "}"
+
+    with_clause = "WITH " + ",\n     ".join(with_clauses)
+    return_clause = (
+        "RETURN {"
+        + ", ".join(f"{k}: {v}" for k, v in output_dict.items())
+        + "} AS output"
+    )
+
+    # Combine all parts of the Cypher query
+    cypher_query = "\n".join([match_clause, with_clause, return_clause])
+    return cypher_query
+
+
+def _format_schema(schema: Dict, is_enhanced: bool) -> str:
+    formatted_node_props = []
+    formatted_rel_props = []
+    if is_enhanced:
+        # Enhanced formatting for nodes
+        for node_type, properties in schema["node_props"].items():
+            formatted_node_props.append(f"- **{node_type}**")
+            for prop in properties:
+                example = ""
+                if prop["type"] == "STRING":
+                    if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
+                        example = (
+                            f'Example: "{prop["values"][0].replace(NL, " ")}"'
+                            if prop["values"]
+                            else ""
+                        )
+                    else:  # If less than 10 possible values return all
+                        example = (
+                            (
+                                "Available options: "
+                                f'{[el.replace(NL, " ") for el in prop["values"]]}'
+                            )
+                            if prop["values"]
+                            else ""
+                        )
+
+                elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
+                    if prop.get("min") is not None:
+                        example = f'Min: {prop["min"]}, Max: {prop["max"]}'
+                    else:
+                        example = (
+                            f'Example: "{prop["values"][0]}"' if prop["values"] else ""
+                        )
+                elif prop["type"] == "LIST":
+                    # Skip embeddings
+                    if prop["min_size"] > LIST_LIMIT:
+                        continue
+                    example = (
+                        f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
+                    )
+                formatted_node_props.append(
+                    f"  - `{prop['property']}: {prop['type']}` {example}"
+                )
+
+        # Enhanced formatting for relationships
+        for rel_type, properties in schema["rel_props"].items():
+            formatted_rel_props.append(f"- **{rel_type}**")
+            for prop in properties:
+                example = ""
+                if prop["type"] == "STRING":
+                    if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
+                        example = (
+                            f'Example: "{prop["values"][0].replace(NL, " ")}"'
+                            if prop["values"]
+                            else ""
+                        )
+                    else:  # If less than 10 possible values return all
+                        example = (
+                            (
+                                "Available options: "
+                                f'{[el.replace(NL, " ") for el in prop["values"]]}'
+                            )
+                            if prop["values"]
+                            else ""
+                        )
+                elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
+                    if prop.get("min"):  # If we have min/max
+                        example = f'Min: {prop["min"]}, Max:  {prop["max"]}'
+                    else:  # return a single value
+                        example = (
+                            f'Example: "{prop["values"][0]}"' if prop["values"] else ""
+                        )
+                elif prop["type"] == "LIST":
+                    # Skip embeddings
+                    if prop["min_size"] > LIST_LIMIT:
+                        continue
+                    example = (
+                        f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
+                    )
+                formatted_rel_props.append(
+                    f"  - `{prop['property']}: {prop['type']}` {example}"
+                )
+    else:
+        # Format node properties
+        for label, props in schema["node_props"].items():
+            props_str = ", ".join(
+                [f"{prop['property']}: {prop['type']}" for prop in props]
+            )
+            formatted_node_props.append(f"{label} {{{props_str}}}")
+
+        # Format relationship properties using structured_schema
+        for type, props in schema["rel_props"].items():
+            props_str = ", ".join(
+                [f"{prop['property']}: {prop['type']}" for prop in props]
+            )
+            formatted_rel_props.append(f"{type} {{{props_str}}}")
+
+    # Format relationships
+    formatted_rels = [
+        f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
+        for el in schema["relationships"]
+    ]
+
+    return "\n".join(
+        [
+            "Node properties:",
+            "\n".join(formatted_node_props),
+            "Relationship properties:",
+            "\n".join(formatted_rel_props),
+            "The relationships:",
+            "\n".join(formatted_rels),
+        ]
+    )
+
+
 class Neo4jGraph(GraphStore):
    """Neo4j database wrapper for various graph operations.

@@ -151,6 +372,8 @@ class Neo4jGraph(GraphStore):
            embedding-like properties from database responses. Default is False.
    refresh_schema (bool): A flag whether to refresh schema information
            at initialization. Default is True.
+    enhanced_schema (bool): A flag whether to scan the database for
+            example values and use them in the graph schema. Default is False.
    driver_config (Dict): Configuration passed to Neo4j Driver.

    *Security note*: Make sure that the database connection uses credentials
@@ -176,6 +399,7 @@ class Neo4jGraph(GraphStore):
        refresh_schema: bool = True,
        *,
        driver_config: Optional[Dict] = None,
+        enhanced_schema: bool = False,
    ) -> None:
        """Create a new Neo4j graph wrapper instance."""
        try:
@@ -203,6 +427,7 @@ class Neo4jGraph(GraphStore):
        self._database = database
        self.timeout = timeout
        self.sanitize = sanitize
+        self._enhanced_schema = enhanced_schema
        self.schema: str = ""
        self.structured_schema: Dict[str, Any] = {}
        # Verify connection
@@ -300,37 +525,48 @@ class Neo4jGraph(GraphStore):
            "metadata": {"constraint": constraint, "index": index},
        }

-        # Format node properties
-        formatted_node_props = []
-        for el in node_properties:
-            props_str = ", ".join(
-                [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
+        if self._enhanced_schema:
+            schema_counts = self.query(
+                "CALL apoc.meta.graphSample() YIELD nodes, relationships "
+                "RETURN nodes, [rel in relationships | {name:apoc.any.property"
+                "(rel, 'type'), count: apoc.any.property(rel, 'count')}]"
+                " AS relationships"
            )
-            formatted_node_props.append(f"{el['labels']} {{{props_str}}}")
+            # Update node info
+            for node in schema_counts[0]["nodes"]:
+                # Skip bloom labels
+                if node["name"] in EXCLUDED_LABELS:
+                    continue
+                node_props = self.structured_schema["node_props"][node["name"]]
+                enhanced_cypher = _enhanced_schema_cypher(
+                    node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT
+                )
+                enhanced_info = self.query(enhanced_cypher)[0]["output"]
+                for prop in node_props:
+                    if prop["property"] in enhanced_info:
+                        prop.update(enhanced_info[prop["property"]])
+            # Update rel info
+            for rel in schema_counts[0]["relationships"]:
+                # Skip bloom labels
+                if rel["name"] in EXCLUDED_RELS:
+                    continue
+                rel_props = self.structured_schema["rel_props"].get(rel["name"])
+                if not rel_props:
+                    continue
+                enhanced_cypher = _enhanced_schema_cypher(
+                    rel["name"],
+                    rel_props,
+                    rel["count"] < EXHAUSTIVE_SEARCH_LIMIT,
+                    is_relationship=True,
+                )
+                enhanced_info = self.query(enhanced_cypher)[0]["output"]
+                for prop in rel_props:
+                    if prop["property"] in enhanced_info:
+                        prop.update(enhanced_info[prop["property"]])

-        # Format relationship properties
-        formatted_rel_props = []
-        for el in rel_properties:
-            props_str = ", ".join(
-                [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
-            )
-            formatted_rel_props.append(f"{el['type']} {{{props_str}}}")
+        schema = _format_schema(self.structured_schema, self._enhanced_schema)

-        # Format relationships
-        formatted_rels = [
-            f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships
-        ]
-
-        self.schema = "\n".join(
-            [
-                "Node properties are the following:",
-                ",".join(formatted_node_props),
-                "Relationship properties are the following:",
-                ",".join(formatted_rel_props),
-                "The relationships are the following:",
-                ",".join(formatted_rels),
-            ]
-        )
+        self.schema = schema

    def add_graph_documents(
        self,
--- a/libs/community/tests/integration_tests/graphs/test_neo4j.py
+++ b/libs/community/tests/integration_tests/graphs/test_neo4j.py
@@ -291,3 +291,45 @@ def test_driver_config() -> None:
        driver_config={"max_connection_pool_size": 1},
    )
    graph.query("RETURN 'foo'")
+
+
+def test_enhanced_schema() -> None:
+    """Test that neo4j works with driver config."""
+    url = os.environ.get("NEO4J_URI")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(
+        url=url, username=username, password=password, enhanced_schema=True
+    )
+    graph.query("MATCH (n) DETACH DELETE n")
+    graph.add_graph_documents(test_data)
+    graph.refresh_schema()
+    expected_output = {
+        "node_props": {
+            "foo": [
+                {
+                    "property": "id",
+                    "type": "STRING",
+                    "values": ["foo"],
+                    "distinct_count": 1,
+                }
+            ],
+            "bar": [
+                {
+                    "property": "id",
+                    "type": "STRING",
+                    "values": ["bar"],
+                    "distinct_count": 1,
+                }
+            ],
+        },
+        "rel_props": {},
+        "relationships": [{"start": "foo", "type": "REL", "end": "bar"}],
+    }
+    # remove metadata portion of schema
+    del graph.structured_schema["metadata"]
+    assert graph.structured_schema == expected_output