mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 22:29:51 +00:00
community[patch]: Neo4j enhanced schema (#20983)
Scan the database for example values and provide them to an LLM for better inference of Text2cypher
This commit is contained in:
parent
dc70c23a11
commit
67428c4052
@ -21,7 +21,7 @@
|
||||
"id": "dbc0ee68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Settin up\n",
|
||||
"## Setting up\n",
|
||||
"\n",
|
||||
"You will need to have a running `Neo4j` instance. One option is to create a [free Neo4j database instance in their Aura cloud service](https://neo4j.com/cloud/platform/aura-graph-database/). You can also run the database locally using the [Neo4j Desktop application](https://neo4j.com/download/), or running a docker container.\n",
|
||||
"You can run a local docker container by running the executing the following script:\n",
|
||||
@ -31,7 +31,7 @@
|
||||
" --name neo4j \\\n",
|
||||
" -p 7474:7474 -p 7687:7687 \\\n",
|
||||
" -d \\\n",
|
||||
" -e NEO4J_AUTH=neo4j/pleaseletmein \\\n",
|
||||
" -e NEO4J_AUTH=neo4j/password \\\n",
|
||||
" -e NEO4J_PLUGINS=\\[\\\"apoc\\\"\\] \\\n",
|
||||
" neo4j:latest\n",
|
||||
"```\n",
|
||||
@ -58,9 +58,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = Neo4jGraph(\n",
|
||||
" url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
|
||||
")"
|
||||
"graph = Neo4jGraph(url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"password\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -93,7 +91,7 @@
|
||||
"source": [
|
||||
"graph.query(\n",
|
||||
" \"\"\"\n",
|
||||
"MERGE (m:Movie {name:\"Top Gun\"})\n",
|
||||
"MERGE (m:Movie {name:\"Top Gun\", runtime: 120})\n",
|
||||
"WITH m\n",
|
||||
"UNWIND [\"Tom Cruise\", \"Val Kilmer\", \"Anthony Edwards\", \"Meg Ryan\"] AS actor\n",
|
||||
"MERGE (a:Actor {name:actor})\n",
|
||||
@ -131,11 +129,12 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Node properties are the following:\n",
|
||||
"Movie {name: STRING},Actor {name: STRING}\n",
|
||||
"Relationship properties are the following:\n",
|
||||
"Node properties:\n",
|
||||
"Movie {runtime: INTEGER, name: STRING}\n",
|
||||
"Actor {name: STRING}\n",
|
||||
"Relationship properties:\n",
|
||||
"\n",
|
||||
"The relationships are the following:\n",
|
||||
"The relationships:\n",
|
||||
"(:Actor)-[:ACTED_IN]->(:Movie)\n"
|
||||
]
|
||||
}
|
||||
@ -144,6 +143,48 @@
|
||||
"print(graph.schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3d88f516-2e60-4da4-b25f-dad5801fe133",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Enhanced schema information\n",
|
||||
"Choosing the enhanced schema version enables the system to automatically scan for example values within the databases and calculate some distribution metrics. For example, if a node property has less than 10 distinct values, we return all possible values in the schema. Otherwise, return only a single example value per node and relationship property."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c8233976-1ca7-4f8f-af20-e8fb3e081fdd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Node properties:\n",
|
||||
"- **Movie**\n",
|
||||
" - `runtime: INTEGER` Min: 120, Max: 120\n",
|
||||
" - `name: STRING` Available options: ['Top Gun']\n",
|
||||
"- **Actor**\n",
|
||||
" - `name: STRING` Available options: ['Tom Cruise', 'Val Kilmer', 'Anthony Edwards', 'Meg Ryan']\n",
|
||||
"Relationship properties:\n",
|
||||
"\n",
|
||||
"The relationships:\n",
|
||||
"(:Actor)-[:ACTED_IN]->(:Movie)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"enhanced_graph = Neo4jGraph(\n",
|
||||
" url=\"bolt://localhost:7687\",\n",
|
||||
" username=\"neo4j\",\n",
|
||||
" password=\"password\",\n",
|
||||
" enhanced_schema=True,\n",
|
||||
")\n",
|
||||
"print(enhanced_graph.schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68a3c677",
|
||||
@ -156,7 +197,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "7476ce98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -168,7 +209,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "ef8ee27b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -180,10 +221,11 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
|
||||
"WHERE m.name = 'Top Gun'\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@ -191,16 +233,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
|
||||
"{'query': 'Who played in Top Gun?',\n",
|
||||
" 'result': 'Anthony Edwards, Meg Ryan, Val Kilmer, Tom Cruise played in Top Gun.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
"chain.invoke({\"query\": \"Who played in Top Gun?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -215,7 +258,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "df230946",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -227,7 +270,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"id": "3f1600ee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -239,10 +282,11 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
|
||||
"WHERE m.name = 'Top Gun'\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@ -250,16 +294,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tom Cruise and Val Kilmer played in Top Gun.'"
|
||||
"{'query': 'Who played in Top Gun?',\n",
|
||||
" 'result': 'Anthony Edwards, Meg Ryan played in Top Gun.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
"chain.invoke({\"query\": \"Who played in Top Gun?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -273,7 +318,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"id": "e412f36b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -285,7 +330,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "4f4699dc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -297,19 +342,20 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
|
||||
"WHERE m.name = 'Top Gun'\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\\nRETURN a.name\"}, {'context': [{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]}]\n",
|
||||
"Final answer: Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.\n"
|
||||
"Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\\nWHERE m.name = 'Top Gun'\\nRETURN a.name\"}, {'context': [{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]}]\n",
|
||||
"Final answer: Anthony Edwards, Meg Ryan, Val Kilmer, Tom Cruise played in Top Gun.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain(\"Who played in Top Gun?\")\n",
|
||||
"result = chain.invoke({\"query\": \"Who played in Top Gun?\"})\n",
|
||||
"print(f\"Intermediate steps: {result['intermediate_steps']}\")\n",
|
||||
"print(f\"Final answer: {result['result']}\")"
|
||||
]
|
||||
@ -325,7 +371,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"id": "2d3acf10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -337,7 +383,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 14,
|
||||
"id": "b0a9d143",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -349,7 +395,8 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
|
||||
"WHERE m.name = 'Top Gun'\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
@ -358,19 +405,20 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'a.name': 'Tom Cruise'},\n",
|
||||
" {'a.name': 'Val Kilmer'},\n",
|
||||
" {'a.name': 'Anthony Edwards'},\n",
|
||||
" {'a.name': 'Meg Ryan'}]"
|
||||
"{'query': 'Who played in Top Gun?',\n",
|
||||
" 'result': [{'a.name': 'Anthony Edwards'},\n",
|
||||
" {'a.name': 'Meg Ryan'},\n",
|
||||
" {'a.name': 'Val Kilmer'},\n",
|
||||
" {'a.name': 'Tom Cruise'}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
"chain.invoke({\"query\": \"Who played in Top Gun?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -384,7 +432,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 15,
|
||||
"id": "59baeb88-adfa-4c26-8334-fcbff3a98efb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -422,7 +470,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 16,
|
||||
"id": "47c64027-cf42-493a-9c76-2d10ba753728",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -434,7 +482,7 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (m:Movie {name:\"Top Gun\"})<-[:ACTED_IN]-(:Actor)\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (:Movie {name:\"Top Gun\"})<-[:ACTED_IN]-()\n",
|
||||
"RETURN count(*) AS numberOfActors\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'numberOfActors': 4}]\u001b[0m\n",
|
||||
@ -445,16 +493,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Four people played in Top Gun.'"
|
||||
"{'query': 'How many people played in Top Gun?',\n",
|
||||
" 'result': 'There were 4 actors who played in Top Gun.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"How many people played in Top Gun?\")"
|
||||
"chain.invoke({\"query\": \"How many people played in Top Gun?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -468,7 +517,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 17,
|
||||
"id": "6f9becc2-f579-45bf-9b50-2ce02bde92da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -483,7 +532,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 18,
|
||||
"id": "ff18e3e3-3402-4683-aec4-a19898f23ca1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -495,10 +544,11 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
|
||||
"WHERE m.name = 'Top Gun'\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@ -506,16 +556,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
|
||||
"{'query': 'Who played in Top Gun?',\n",
|
||||
" 'result': 'Anthony Edwards, Meg Ryan, Val Kilmer, and Tom Cruise played in Top Gun.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
"chain.invoke({\"query\": \"Who played in Top Gun?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -530,7 +581,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 19,
|
||||
"id": "a20fa21e-fb85-41c4-aac0-53fb25e34604",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -546,7 +597,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 20,
|
||||
"id": "3ad7f6b8-543e-46e4-a3b2-40fa3e66e895",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -579,7 +630,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 21,
|
||||
"id": "53665d03-7afd-433c-bdd5-750127bfb152",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -594,7 +645,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 22,
|
||||
"id": "19e1a591-9c10-4d7b-aa36-a5e1b778a97b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -606,10 +657,11 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\n",
|
||||
"WHERE m.name = 'Top Gun'\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@ -617,16 +669,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
|
||||
"{'query': 'Who played in Top Gun?',\n",
|
||||
" 'result': 'Anthony Edwards, Meg Ryan, Val Kilmer, Tom Cruise played in Top Gun.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
"chain.invoke({\"query\": \"Who played in Top Gun?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -654,7 +707,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -9,6 +9,11 @@ from langchain_community.graphs.graph_store import GraphStore
|
||||
BASE_ENTITY_LABEL = "__Entity__"
|
||||
EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"]
|
||||
EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"]
|
||||
EXHAUSTIVE_SEARCH_LIMIT = 10000
|
||||
LIST_LIMIT = 128
|
||||
# Threshold for returning all available prop values in graph schema
|
||||
DISTINCT_VALUE_LIMIT = 10
|
||||
NL = "\n"
|
||||
|
||||
node_properties_query = """
|
||||
CALL apoc.meta.data()
|
||||
@ -56,7 +61,6 @@ def value_sanitize(d: Any) -> Any:
|
||||
results, can occupy significant context space and detract from
|
||||
the LLM's performance by introducing unnecessary noise and cost.
|
||||
"""
|
||||
LIST_LIMIT = 128
|
||||
if isinstance(d, dict):
|
||||
new_dict = {}
|
||||
for key, value in d.items():
|
||||
@ -135,6 +139,223 @@ def _get_rel_import_query(baseEntityLabel: bool) -> str:
|
||||
)
|
||||
|
||||
|
||||
def _enhanced_schema_cypher(
|
||||
label_or_type: str,
|
||||
properties: List[Dict[str, Any]],
|
||||
exhaustive: bool,
|
||||
is_relationship: bool = False,
|
||||
) -> str:
|
||||
if is_relationship:
|
||||
match_clause = f"MATCH ()-[n:{label_or_type}]->()"
|
||||
else:
|
||||
match_clause = f"MATCH (n:{label_or_type})"
|
||||
|
||||
with_clauses = []
|
||||
return_clauses = []
|
||||
output_dict = {}
|
||||
if exhaustive:
|
||||
for prop in properties:
|
||||
prop_name = prop["property"]
|
||||
prop_type = prop["type"]
|
||||
if prop_type == "STRING":
|
||||
with_clauses.append(
|
||||
(
|
||||
f"collect(distinct substring(n.`{prop_name}`, 0, 50)) "
|
||||
f"AS `{prop_name}_values`"
|
||||
)
|
||||
)
|
||||
return_clauses.append(
|
||||
(
|
||||
f"values:`{prop_name}_values`[..{DISTINCT_VALUE_LIMIT}],"
|
||||
f" distinct_count: size(`{prop_name}_values`)"
|
||||
)
|
||||
)
|
||||
elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
|
||||
with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`")
|
||||
with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`")
|
||||
with_clauses.append(
|
||||
f"count(distinct n.`{prop_name}`) AS `{prop_name}_distinct`"
|
||||
)
|
||||
return_clauses.append(
|
||||
(
|
||||
f"min: toString(`{prop_name}_min`), "
|
||||
f"max: toString(`{prop_name}_max`), "
|
||||
f"distinct_count: `{prop_name}_distinct`"
|
||||
)
|
||||
)
|
||||
elif prop_type == "LIST":
|
||||
with_clauses.append(
|
||||
(
|
||||
f"min(size(n.`{prop_name}`)) AS `{prop_name}_size_min`, "
|
||||
f"max(size(n.`{prop_name}`)) AS `{prop_name}_size_max`"
|
||||
)
|
||||
)
|
||||
return_clauses.append(
|
||||
f"min_size: `{prop_name}_size_min`, "
|
||||
f"max_size: `{prop_name}_size_max`"
|
||||
)
|
||||
|
||||
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
|
||||
else:
|
||||
# Just sample 5 random nodes
|
||||
match_clause += " WITH n LIMIT 5"
|
||||
for prop in properties:
|
||||
prop_name = prop["property"]
|
||||
prop_type = prop["type"]
|
||||
if prop_type == "STRING":
|
||||
with_clauses.append(
|
||||
(
|
||||
f"collect(distinct substring(n.`{prop_name}`, 0, 50)) "
|
||||
f"AS `{prop_name}_values`"
|
||||
)
|
||||
)
|
||||
return_clauses.append(f"values: `{prop_name}_values`")
|
||||
elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
|
||||
with_clauses.append(
|
||||
f"collect(distinct toString(n.`{prop_name}`)) "
|
||||
f"AS `{prop_name}_values`"
|
||||
)
|
||||
return_clauses.append(f"values: `{prop_name}_values`")
|
||||
elif prop_type == "LIST":
|
||||
with_clauses.append(
|
||||
(
|
||||
f"min(size(n.`{prop_name}`)) AS `{prop_name}_size_min`, "
|
||||
f"max(size(n.`{prop_name}`)) AS `{prop_name}_size_max`"
|
||||
)
|
||||
)
|
||||
return_clauses.append(
|
||||
f"min_size: `{prop_name}_size_min`,max_size: `{prop_name}_size_max`"
|
||||
)
|
||||
|
||||
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
|
||||
|
||||
with_clause = "WITH " + ",\n ".join(with_clauses)
|
||||
return_clause = (
|
||||
"RETURN {"
|
||||
+ ", ".join(f"{k}: {v}" for k, v in output_dict.items())
|
||||
+ "} AS output"
|
||||
)
|
||||
|
||||
# Combine all parts of the Cypher query
|
||||
cypher_query = "\n".join([match_clause, with_clause, return_clause])
|
||||
return cypher_query
|
||||
|
||||
|
||||
def _format_schema(schema: Dict, is_enhanced: bool) -> str:
|
||||
formatted_node_props = []
|
||||
formatted_rel_props = []
|
||||
if is_enhanced:
|
||||
# Enhanced formatting for nodes
|
||||
for node_type, properties in schema["node_props"].items():
|
||||
formatted_node_props.append(f"- **{node_type}**")
|
||||
for prop in properties:
|
||||
example = ""
|
||||
if prop["type"] == "STRING":
|
||||
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
|
||||
example = (
|
||||
f'Example: "{prop["values"][0].replace(NL, " ")}"'
|
||||
if prop["values"]
|
||||
else ""
|
||||
)
|
||||
else: # If less than 10 possible values return all
|
||||
example = (
|
||||
(
|
||||
"Available options: "
|
||||
f'{[el.replace(NL, " ") for el in prop["values"]]}'
|
||||
)
|
||||
if prop["values"]
|
||||
else ""
|
||||
)
|
||||
|
||||
elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
|
||||
if prop.get("min") is not None:
|
||||
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
|
||||
else:
|
||||
example = (
|
||||
f'Example: "{prop["values"][0]}"' if prop["values"] else ""
|
||||
)
|
||||
elif prop["type"] == "LIST":
|
||||
# Skip embeddings
|
||||
if prop["min_size"] > LIST_LIMIT:
|
||||
continue
|
||||
example = (
|
||||
f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
|
||||
)
|
||||
formatted_node_props.append(
|
||||
f" - `{prop['property']}: {prop['type']}` {example}"
|
||||
)
|
||||
|
||||
# Enhanced formatting for relationships
|
||||
for rel_type, properties in schema["rel_props"].items():
|
||||
formatted_rel_props.append(f"- **{rel_type}**")
|
||||
for prop in properties:
|
||||
example = ""
|
||||
if prop["type"] == "STRING":
|
||||
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
|
||||
example = (
|
||||
f'Example: "{prop["values"][0].replace(NL, " ")}"'
|
||||
if prop["values"]
|
||||
else ""
|
||||
)
|
||||
else: # If less than 10 possible values return all
|
||||
example = (
|
||||
(
|
||||
"Available options: "
|
||||
f'{[el.replace(NL, " ") for el in prop["values"]]}'
|
||||
)
|
||||
if prop["values"]
|
||||
else ""
|
||||
)
|
||||
elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
|
||||
if prop.get("min"): # If we have min/max
|
||||
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
|
||||
else: # return a single value
|
||||
example = (
|
||||
f'Example: "{prop["values"][0]}"' if prop["values"] else ""
|
||||
)
|
||||
elif prop["type"] == "LIST":
|
||||
# Skip embeddings
|
||||
if prop["min_size"] > LIST_LIMIT:
|
||||
continue
|
||||
example = (
|
||||
f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
|
||||
)
|
||||
formatted_rel_props.append(
|
||||
f" - `{prop['property']}: {prop['type']}` {example}"
|
||||
)
|
||||
else:
|
||||
# Format node properties
|
||||
for label, props in schema["node_props"].items():
|
||||
props_str = ", ".join(
|
||||
[f"{prop['property']}: {prop['type']}" for prop in props]
|
||||
)
|
||||
formatted_node_props.append(f"{label} {{{props_str}}}")
|
||||
|
||||
# Format relationship properties using structured_schema
|
||||
for type, props in schema["rel_props"].items():
|
||||
props_str = ", ".join(
|
||||
[f"{prop['property']}: {prop['type']}" for prop in props]
|
||||
)
|
||||
formatted_rel_props.append(f"{type} {{{props_str}}}")
|
||||
|
||||
# Format relationships
|
||||
formatted_rels = [
|
||||
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
|
||||
for el in schema["relationships"]
|
||||
]
|
||||
|
||||
return "\n".join(
|
||||
[
|
||||
"Node properties:",
|
||||
"\n".join(formatted_node_props),
|
||||
"Relationship properties:",
|
||||
"\n".join(formatted_rel_props),
|
||||
"The relationships:",
|
||||
"\n".join(formatted_rels),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class Neo4jGraph(GraphStore):
|
||||
"""Neo4j database wrapper for various graph operations.
|
||||
|
||||
@ -151,6 +372,8 @@ class Neo4jGraph(GraphStore):
|
||||
embedding-like properties from database responses. Default is False.
|
||||
refresh_schema (bool): A flag whether to refresh schema information
|
||||
at initialization. Default is True.
|
||||
enhanced_schema (bool): A flag whether to scan the database for
|
||||
example values and use them in the graph schema. Default is False.
|
||||
driver_config (Dict): Configuration passed to Neo4j Driver.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
@ -176,6 +399,7 @@ class Neo4jGraph(GraphStore):
|
||||
refresh_schema: bool = True,
|
||||
*,
|
||||
driver_config: Optional[Dict] = None,
|
||||
enhanced_schema: bool = False,
|
||||
) -> None:
|
||||
"""Create a new Neo4j graph wrapper instance."""
|
||||
try:
|
||||
@ -203,6 +427,7 @@ class Neo4jGraph(GraphStore):
|
||||
self._database = database
|
||||
self.timeout = timeout
|
||||
self.sanitize = sanitize
|
||||
self._enhanced_schema = enhanced_schema
|
||||
self.schema: str = ""
|
||||
self.structured_schema: Dict[str, Any] = {}
|
||||
# Verify connection
|
||||
@ -300,37 +525,48 @@ class Neo4jGraph(GraphStore):
|
||||
"metadata": {"constraint": constraint, "index": index},
|
||||
}
|
||||
|
||||
# Format node properties
|
||||
formatted_node_props = []
|
||||
for el in node_properties:
|
||||
props_str = ", ".join(
|
||||
[f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
|
||||
if self._enhanced_schema:
|
||||
schema_counts = self.query(
|
||||
"CALL apoc.meta.graphSample() YIELD nodes, relationships "
|
||||
"RETURN nodes, [rel in relationships | {name:apoc.any.property"
|
||||
"(rel, 'type'), count: apoc.any.property(rel, 'count')}]"
|
||||
" AS relationships"
|
||||
)
|
||||
formatted_node_props.append(f"{el['labels']} {{{props_str}}}")
|
||||
# Update node info
|
||||
for node in schema_counts[0]["nodes"]:
|
||||
# Skip bloom labels
|
||||
if node["name"] in EXCLUDED_LABELS:
|
||||
continue
|
||||
node_props = self.structured_schema["node_props"][node["name"]]
|
||||
enhanced_cypher = _enhanced_schema_cypher(
|
||||
node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT
|
||||
)
|
||||
enhanced_info = self.query(enhanced_cypher)[0]["output"]
|
||||
for prop in node_props:
|
||||
if prop["property"] in enhanced_info:
|
||||
prop.update(enhanced_info[prop["property"]])
|
||||
# Update rel info
|
||||
for rel in schema_counts[0]["relationships"]:
|
||||
# Skip bloom labels
|
||||
if rel["name"] in EXCLUDED_RELS:
|
||||
continue
|
||||
rel_props = self.structured_schema["rel_props"].get(rel["name"])
|
||||
if not rel_props:
|
||||
continue
|
||||
enhanced_cypher = _enhanced_schema_cypher(
|
||||
rel["name"],
|
||||
rel_props,
|
||||
rel["count"] < EXHAUSTIVE_SEARCH_LIMIT,
|
||||
is_relationship=True,
|
||||
)
|
||||
enhanced_info = self.query(enhanced_cypher)[0]["output"]
|
||||
for prop in rel_props:
|
||||
if prop["property"] in enhanced_info:
|
||||
prop.update(enhanced_info[prop["property"]])
|
||||
|
||||
# Format relationship properties
|
||||
formatted_rel_props = []
|
||||
for el in rel_properties:
|
||||
props_str = ", ".join(
|
||||
[f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
|
||||
)
|
||||
formatted_rel_props.append(f"{el['type']} {{{props_str}}}")
|
||||
schema = _format_schema(self.structured_schema, self._enhanced_schema)
|
||||
|
||||
# Format relationships
|
||||
formatted_rels = [
|
||||
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships
|
||||
]
|
||||
|
||||
self.schema = "\n".join(
|
||||
[
|
||||
"Node properties are the following:",
|
||||
",".join(formatted_node_props),
|
||||
"Relationship properties are the following:",
|
||||
",".join(formatted_rel_props),
|
||||
"The relationships are the following:",
|
||||
",".join(formatted_rels),
|
||||
]
|
||||
)
|
||||
self.schema = schema
|
||||
|
||||
def add_graph_documents(
|
||||
self,
|
||||
|
@ -291,3 +291,45 @@ def test_driver_config() -> None:
|
||||
driver_config={"max_connection_pool_size": 1},
|
||||
)
|
||||
graph.query("RETURN 'foo'")
|
||||
|
||||
|
||||
def test_enhanced_schema() -> None:
|
||||
"""Test that neo4j works with driver config."""
|
||||
url = os.environ.get("NEO4J_URI")
|
||||
username = os.environ.get("NEO4J_USERNAME")
|
||||
password = os.environ.get("NEO4J_PASSWORD")
|
||||
assert url is not None
|
||||
assert username is not None
|
||||
assert password is not None
|
||||
|
||||
graph = Neo4jGraph(
|
||||
url=url, username=username, password=password, enhanced_schema=True
|
||||
)
|
||||
graph.query("MATCH (n) DETACH DELETE n")
|
||||
graph.add_graph_documents(test_data)
|
||||
graph.refresh_schema()
|
||||
expected_output = {
|
||||
"node_props": {
|
||||
"foo": [
|
||||
{
|
||||
"property": "id",
|
||||
"type": "STRING",
|
||||
"values": ["foo"],
|
||||
"distinct_count": 1,
|
||||
}
|
||||
],
|
||||
"bar": [
|
||||
{
|
||||
"property": "id",
|
||||
"type": "STRING",
|
||||
"values": ["bar"],
|
||||
"distinct_count": 1,
|
||||
}
|
||||
],
|
||||
},
|
||||
"rel_props": {},
|
||||
"relationships": [{"start": "foo", "type": "REL", "end": "bar"}],
|
||||
}
|
||||
# remove metadata portion of schema
|
||||
del graph.structured_schema["metadata"]
|
||||
assert graph.structured_schema == expected_output
|
||||
|
Loading…
Reference in New Issue
Block a user