mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-31 08:32:32 +00:00
[community][graph]: Update KuzuQAChain and docs (#21218)
This PR makes some small updates for `KuzuQAChain` for graph QA. - Updated Cypher generation prompt (we now support `WHERE EXISTS`) and generalize it more - Support different LLMs for Cypher generation and QA - Update docs and examples
This commit is contained in:
parent
752b1e85f8
commit
63c3a0e56c
@ -7,11 +7,12 @@
|
||||
"source": [
|
||||
"# Kuzu\n",
|
||||
"\n",
|
||||
">[Kùzu](https://kuzudb.com) is an in-process property graph database management system. \n",
|
||||
">\n",
|
||||
">This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database with `Cypher` graph query language.\n",
|
||||
">\n",
|
||||
">[Cypher](https://en.wikipedia.org/wiki/Cypher_(query_language)) is a declarative graph query language that allows for expressive and efficient data querying in a property graph."
|
||||
">[Kùzu](https://kuzudb.com) is an embeddable property graph database management system built for query speed and scalability.\n",
|
||||
"> \n",
|
||||
"> Kùzu has a permissive (MIT) open source license and implements [Cypher](https://en.wikipedia.org/wiki/Cypher_(query_language)), a declarative graph query language that allows for expressive and efficient data querying in a property graph.\n",
|
||||
"> It uses columnar storage and its query processor contains novel join algorithms that allow it to scale to very large graphs without sacrificing query performance.\n",
|
||||
"> \n",
|
||||
"> This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database with Cypher."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -21,7 +22,8 @@
|
||||
"source": [
|
||||
"## Setting up\n",
|
||||
"\n",
|
||||
"Install the python package:\n",
|
||||
"Kùzu is an embedded database (it runs in-process), so there are no servers to manage.\n",
|
||||
"Simply install it via its Python package:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install kuzu\n",
|
||||
@ -32,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -52,16 +54,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<kuzu.query_result.QueryResult at 0x1066ff410>"
|
||||
"<kuzu.query_result.QueryResult at 0x103a72290>"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -84,16 +86,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<kuzu.query_result.QueryResult at 0x107016210>"
|
||||
"<kuzu.query_result.QueryResult at 0x103a9e750>"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -132,7 +134,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -143,7 +145,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -152,11 +154,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = KuzuQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)"
|
||||
"chain = KuzuQAChain.from_llm(\n",
|
||||
" llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
|
||||
" graph=graph,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -166,12 +172,13 @@
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"\n",
|
||||
"If the schema of database changes, you can refresh the schema information needed to generate Cypher statements."
|
||||
"If the schema of database changes, you can refresh the schema information needed to generate Cypher statements.\n",
|
||||
"You can also display the schema of the Kùzu graph as demonstrated below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -180,7 +187,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -205,78 +212,7 @@
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"We can now use the `KuzuQAChain` to ask question of the graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'}) RETURN p.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Al Pacino and Robert De Niro both played in The Godfather: Part II.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in The Godfather: Part II?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n",
|
||||
"RETURN m.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Robert De Niro played in The Godfather: Part II.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Robert De Niro played in which movies?\")"
|
||||
"We can now use the `KuzuQAChain` to ask questions of the graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -290,12 +226,13 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n",
|
||||
"RETURN p.birthDate\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie)\n",
|
||||
"WHERE m.name = 'The Godfather: Part II'\n",
|
||||
"RETURN p.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'p.birthDate': '1943-08-17'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@ -303,7 +240,8 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Robert De Niro was born on August 17, 1943.'"
|
||||
"{'query': 'Who acted in The Godfather: Part II?',\n",
|
||||
" 'result': 'Al Pacino, Robert De Niro acted in The Godfather: Part II.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
@ -312,7 +250,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Robert De Niro is born in which year?\")"
|
||||
"chain.invoke(\"Who acted in The Godfather: Part II?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -326,13 +264,87 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie{name:'The Godfather: Part II'})\n",
|
||||
"WITH p, m, p.birthDate AS birthDate\n",
|
||||
"ORDER BY birthDate ASC\n",
|
||||
"LIMIT 1\n",
|
||||
"RETURN p.name\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie)\n",
|
||||
"WHERE p.name = 'Robert De Niro'\n",
|
||||
"RETURN m.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'Robert De Niro played in which movies?',\n",
|
||||
" 'result': 'Robert De Niro played in The Godfather: Part II.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"Robert De Niro played in which movies?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (:Person)-[:ActedIn]->(:Movie {name: 'Godfather: Part II'})\n",
|
||||
"RETURN count(*)\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'COUNT_STAR()': 0}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'How many actors played in the Godfather: Part II?',\n",
|
||||
" 'result': \"I don't know the answer.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"How many actors played in the Godfather: Part II?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'})\n",
|
||||
"RETURN p.name\n",
|
||||
"ORDER BY p.birthDate ASC\n",
|
||||
"LIMIT 1\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}]\u001b[0m\n",
|
||||
"\n",
|
||||
@ -342,16 +354,114 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The oldest actor who played in The Godfather: Part II is Al Pacino.'"
|
||||
"{'query': 'Who is the oldest actor who played in The Godfather: Part II?',\n",
|
||||
" 'result': 'Al Pacino is the oldest actor who played in The Godfather: Part II.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who is the oldest actor who played in The Godfather: Part II?\")"
|
||||
"chain.invoke(\"Who is the oldest actor who played in The Godfather: Part II?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use separate LLMs for Cypher and answer generation\n",
|
||||
"\n",
|
||||
"You can specify `cypher_llm` and `qa_llm` separately to use different LLMs for Cypher generation and answer generation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.\n",
|
||||
" warn_deprecated(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain = KuzuQAChain.from_llm(\n",
|
||||
" cypher_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
|
||||
" qa_llm=ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
|
||||
" graph=graph,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
|
||||
" warn_deprecated(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (:Person)-[:ActedIn]->(:Movie {name: 'The Godfather: Part II'})\n",
|
||||
"RETURN count(*)\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'COUNT_STAR()': 2}]\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
|
||||
" warn_deprecated(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'How many actors played in The Godfather: Part II?',\n",
|
||||
" 'result': 'Two actors played in The Godfather: Part II.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"How many actors played in The Godfather: Part II?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -371,7 +481,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -92,15 +92,36 @@ class KuzuQAChain(Chain):
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
llm: Optional[BaseLanguageModel] = None,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
cypher_prompt: BasePromptTemplate = KUZU_GENERATION_PROMPT,
|
||||
cypher_llm: Optional[BaseLanguageModel] = None,
|
||||
qa_llm: Optional[BaseLanguageModel] = None,
|
||||
**kwargs: Any,
|
||||
) -> KuzuQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
|
||||
if not cypher_llm and not llm:
|
||||
raise ValueError("Either `llm` or `cypher_llm` parameters must be provided")
|
||||
if not qa_llm and not llm:
|
||||
raise ValueError(
|
||||
"Either `llm` or `qa_llm` parameters must be provided along with"
|
||||
" `cypher_llm`"
|
||||
)
|
||||
if cypher_llm and qa_llm and llm:
|
||||
raise ValueError(
|
||||
"You can specify up to two of 'cypher_llm', 'qa_llm'"
|
||||
", and 'llm', but not all three simultaneously."
|
||||
)
|
||||
|
||||
qa_chain = LLMChain(
|
||||
llm=qa_llm or llm, # type: ignore[arg-type]
|
||||
prompt=qa_prompt,
|
||||
)
|
||||
cypher_generation_chain = LLMChain(
|
||||
llm=cypher_llm or llm, # type: ignore[arg-type]
|
||||
prompt=cypher_prompt,
|
||||
)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
|
@ -75,13 +75,11 @@ NGQL_GENERATION_PROMPT = PromptTemplate(
|
||||
|
||||
KUZU_EXTRA_INSTRUCTIONS = """
|
||||
Instructions:
|
||||
|
||||
Generate the Kùzu dialect of Cypher with the following rules in mind:
|
||||
|
||||
1. Do not use a `WHERE EXISTS` clause to check the existence of a property.
|
||||
2. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`.
|
||||
3. Do not include any notes or comments even if the statement does not produce the expected result.
|
||||
```\n"""
|
||||
1. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`.
|
||||
2. Do not include triple backticks ``` in your response. Return only Cypher.
|
||||
3. Do not return any notes or comments in your response.
|
||||
\n"""
|
||||
|
||||
KUZU_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace(
|
||||
"Generate Cypher", "Generate Kùzu Cypher"
|
||||
|
Loading…
Reference in New Issue
Block a user