diff --git a/docs/docs/integrations/graphs/kuzu_db.ipynb b/docs/docs/integrations/graphs/kuzu_db.ipynb index b8a9365326e..dde4dddc6c6 100644 --- a/docs/docs/integrations/graphs/kuzu_db.ipynb +++ b/docs/docs/integrations/graphs/kuzu_db.ipynb @@ -7,11 +7,12 @@ "source": [ "# Kuzu\n", "\n", - ">[Kùzu](https://kuzudb.com) is an in-process property graph database management system. \n", - ">\n", - ">This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database with `Cypher` graph query language.\n", - ">\n", - ">[Cypher](https://en.wikipedia.org/wiki/Cypher_(query_language)) is a declarative graph query language that allows for expressive and efficient data querying in a property graph." + ">[Kùzu](https://kuzudb.com) is an embeddable property graph database management system built for query speed and scalability.\n", + "> \n", + "> Kùzu has a permissive (MIT) open source license and implements [Cypher](https://en.wikipedia.org/wiki/Cypher_(query_language)), a declarative graph query language that allows for expressive and efficient data querying in a property graph.\n", + "> It uses columnar storage and its query processor contains novel join algorithms that allow it to scale to very large graphs without sacrificing query performance.\n", + "> \n", + "> This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database with Cypher." ] }, { @@ -21,7 +22,8 @@ "source": [ "## Setting up\n", "\n", - "Install the python package:\n", + "Kùzu is an embedded database (it runs in-process), so there are no servers to manage.\n", + "Simply install it via its Python package:\n", "\n", "```bash\n", "pip install kuzu\n", @@ -32,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -52,16 +54,16 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -84,16 +86,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -132,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -143,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -152,11 +154,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "chain = KuzuQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)" + "chain = KuzuQAChain.from_llm(\n", + " llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n", + " graph=graph,\n", + " verbose=True,\n", + ")" ] }, { @@ -166,12 +172,13 @@ "source": [ "## Refresh graph schema information\n", "\n", - "If the schema of database changes, you can refresh the schema information needed to generate Cypher statements." + "If the schema of database changes, you can refresh the schema information needed to generate Cypher statements.\n", + "You can also display the schema of the Kùzu graph as demonstrated below." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -180,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -205,78 +212,7 @@ "source": [ "## Querying the graph\n", "\n", - "We can now use the `KuzuQAChain` to ask question of the graph" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new chain...\u001b[0m\n", - "Generated Cypher:\n", - "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'}) RETURN p.name\u001b[0m\n", - "Full Context:\n", - "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'Al Pacino and Robert De Niro both played in The Godfather: Part II.'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "chain.run(\"Who played in The Godfather: Part II?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new chain...\u001b[0m\n", - "Generated Cypher:\n", - "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n", - "RETURN m.name\u001b[0m\n", - "Full Context:\n", - "\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'Robert De Niro played in The Godfather: Part II.'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "chain.run(\"Robert De Niro played in which movies?\")" + "We can now use the `KuzuQAChain` to ask questions of the graph." ] }, { @@ -290,12 +226,13 @@ "text": [ "\n", "\n", - "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n", "Generated Cypher:\n", - "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n", - "RETURN p.birthDate\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie)\n", + "WHERE m.name = 'The Godfather: Part II'\n", + "RETURN p.name\u001b[0m\n", "Full Context:\n", - "\u001b[32;1m\u001b[1;3m[{'p.birthDate': '1943-08-17'}]\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n" ] @@ -303,7 +240,8 @@ { "data": { "text/plain": [ - "'Robert De Niro was born on August 17, 1943.'" + "{'query': 'Who acted in The Godfather: Part II?',\n", + " 'result': 'Al Pacino, Robert De Niro acted in The Godfather: Part II.'}" ] }, "execution_count": 11, @@ -312,7 +250,7 @@ } ], "source": [ - "chain.run(\"Robert De Niro is born in which year?\")" + "chain.invoke(\"Who acted in The Godfather: Part II?\")" ] }, { @@ -326,13 +264,87 @@ "text": [ "\n", "\n", - "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n", "Generated Cypher:\n", - "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie{name:'The Godfather: Part II'})\n", - "WITH p, m, p.birthDate AS birthDate\n", - "ORDER BY birthDate ASC\n", - "LIMIT 1\n", - "RETURN p.name\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie)\n", + "WHERE p.name = 'Robert De Niro'\n", + "RETURN m.name\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'query': 'Robert De Niro played in which movies?',\n", + " 'result': 'Robert De Niro played in The Godfather: Part II.'}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke(\"Robert De Niro played in which movies?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (:Person)-[:ActedIn]->(:Movie {name: 'Godfather: Part II'})\n", + "RETURN count(*)\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'COUNT_STAR()': 0}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'query': 'How many actors played in the Godfather: Part II?',\n", + " 'result': \"I don't know the answer.\"}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke(\"How many actors played in the Godfather: Part II?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'})\n", + "RETURN p.name\n", + "ORDER BY p.birthDate ASC\n", + "LIMIT 1\u001b[0m\n", "Full Context:\n", "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}]\u001b[0m\n", "\n", @@ -342,16 +354,114 @@ { "data": { "text/plain": [ - "'The oldest actor who played in The Godfather: Part II is Al Pacino.'" + "{'query': 'Who is the oldest actor who played in The Godfather: Part II?',\n", + " 'result': 'Al Pacino is the oldest actor who played in The Godfather: Part II.'}" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "chain.run(\"Who is the oldest actor who played in The Godfather: Part II?\")" + "chain.invoke(\"Who is the oldest actor who played in The Godfather: Part II?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use separate LLMs for Cypher and answer generation\n", + "\n", + "You can specify `cypher_llm` and `qa_llm` separately to use different LLMs for Cypher generation and answer generation." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.\n", + " warn_deprecated(\n" + ] + } + ], + "source": [ + "chain = KuzuQAChain.from_llm(\n", + " cypher_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n", + " qa_llm=ChatOpenAI(temperature=0, model=\"gpt-4\"),\n", + " graph=graph,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new KuzuQAChain chain...\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n", + " warn_deprecated(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (:Person)-[:ActedIn]->(:Movie {name: 'The Godfather: Part II'})\n", + "RETURN count(*)\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'COUNT_STAR()': 2}]\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/prrao/code/langchain/.venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n", + " warn_deprecated(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'query': 'How many actors played in The Godfather: Part II?',\n", + " 'result': 'Two actors played in The Godfather: Part II.'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke(\"How many actors played in The Godfather: Part II?\")" ] } ], @@ -371,7 +481,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/libs/community/langchain_community/chains/graph_qa/kuzu.py b/libs/community/langchain_community/chains/graph_qa/kuzu.py index 752b7c20df4..2f7b0914c81 100644 --- a/libs/community/langchain_community/chains/graph_qa/kuzu.py +++ b/libs/community/langchain_community/chains/graph_qa/kuzu.py @@ -92,15 +92,36 @@ class KuzuQAChain(Chain): @classmethod def from_llm( cls, - llm: BaseLanguageModel, + llm: Optional[BaseLanguageModel] = None, *, qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT, cypher_prompt: BasePromptTemplate = KUZU_GENERATION_PROMPT, + cypher_llm: Optional[BaseLanguageModel] = None, + qa_llm: Optional[BaseLanguageModel] = None, **kwargs: Any, ) -> KuzuQAChain: """Initialize from LLM.""" - qa_chain = LLMChain(llm=llm, prompt=qa_prompt) - cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt) + if not cypher_llm and not llm: + raise ValueError("Either `llm` or `cypher_llm` parameters must be provided") + if not qa_llm and not llm: + raise ValueError( + "Either `llm` or `qa_llm` parameters must be provided along with" + " `cypher_llm`" + ) + if cypher_llm and qa_llm and llm: + raise ValueError( + "You can specify up to two of 'cypher_llm', 'qa_llm'" + ", and 'llm', but not all three simultaneously." + ) + + qa_chain = LLMChain( + llm=qa_llm or llm, # type: ignore[arg-type] + prompt=qa_prompt, + ) + cypher_generation_chain = LLMChain( + llm=cypher_llm or llm, # type: ignore[arg-type] + prompt=cypher_prompt, + ) return cls( qa_chain=qa_chain, diff --git a/libs/community/langchain_community/chains/graph_qa/prompts.py b/libs/community/langchain_community/chains/graph_qa/prompts.py index a4b5db9583a..ec4d9a4c750 100644 --- a/libs/community/langchain_community/chains/graph_qa/prompts.py +++ b/libs/community/langchain_community/chains/graph_qa/prompts.py @@ -75,13 +75,11 @@ NGQL_GENERATION_PROMPT = PromptTemplate( KUZU_EXTRA_INSTRUCTIONS = """ Instructions: - Generate the Kùzu dialect of Cypher with the following rules in mind: - -1. Do not use a `WHERE EXISTS` clause to check the existence of a property. -2. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`. -3. Do not include any notes or comments even if the statement does not produce the expected result. -```\n""" +1. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`. +2. Do not include triple backticks ``` in your response. Return only Cypher. +3. Do not return any notes or comments in your response. +\n""" KUZU_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace( "Generate Cypher", "Generate Kùzu Cypher"