From a50eabbd48662f4fe73668246b06e1989648f273 Mon Sep 17 00:00:00 2001 From: Jordy Jackson Antunes da Rocha Date: Mon, 1 Jul 2024 14:33:51 -0300 Subject: [PATCH] experimental: LLMGraphTransformer add missing conditional adding restrictions to prompts for LLM that do not support function calling (#22793) - Description: Modified the prompt created by the function `create_unstructured_prompt` (which is called for LLMs that do not support function calling) by adding conditional checks that verify if restrictions on entity types and rel_types should be added to the prompt. If the user provides a sufficiently large text, the current prompt **may** fail to produce results in some LLMs. I have first seen this issue when I implemented a custom LLM class that did not support Function Calling and used Gemini 1.5 Pro, but I was able to replicate this issue using OpenAI models. By loading a sufficiently large text ```python from langchain_community.llms import Ollama from langchain_openai import ChatOpenAI, OpenAI from langchain_core.prompts import PromptTemplate import re from langchain_experimental.graph_transformers import LLMGraphTransformer from langchain_core.documents import Document with open("texto-longo.txt", "r") as file: full_text = file.read() partial_text = full_text[:4000] documents = [Document(page_content=partial_text)] # cropped to fit GPT 3.5 context window ``` And using the chat class (that has function calling) ```python chat_openai = ChatOpenAI(model="gpt-3.5-turbo", model_kwargs={"seed": 42}) chat_gpt35_transformer = LLMGraphTransformer(llm=chat_openai) graph_from_chat_gpt35 = chat_gpt35_transformer.convert_to_graph_documents(documents) ``` It works: ``` >>> print(graph_from_chat_gpt35[0].nodes) [Node(id="Jesu, Joy of Man's Desiring", type='Music'), Node(id='Godel', type='Person'), Node(id='Johann Sebastian Bach', type='Person'), Node(id='clever way of encoding the complicated expressions as numbers', type='Concept')] ``` But if you try to use the non-chat LLM class (that does not support function calling) ```python openai = OpenAI( model="gpt-3.5-turbo-instruct", max_tokens=1000, ) gpt35_transformer = LLMGraphTransformer(llm=openai) graph_from_gpt35 = gpt35_transformer.convert_to_graph_documents(documents) ``` It uses the prompt that has issues and sometimes does not produce any result ``` >>> print(graph_from_gpt35[0].nodes) [] ``` After implementing the changes, I was able to use both classes more consistently: ```shell >>> chat_gpt35_transformer = LLMGraphTransformer(llm=chat_openai) >>> graph_from_chat_gpt35 = chat_gpt35_transformer.convert_to_graph_documents(documents) >>> print(graph_from_chat_gpt35[0].nodes) [Node(id="Jesu, Joy Of Man'S Desiring", type='Music'), Node(id='Johann Sebastian Bach', type='Person'), Node(id='Godel', type='Person')] >>> gpt35_transformer = LLMGraphTransformer(llm=openai) >>> graph_from_gpt35 = gpt35_transformer.convert_to_graph_documents(documents) >>> print(graph_from_gpt35[0].nodes) [Node(id='I', type='Pronoun'), Node(id="JESU, JOY OF MAN'S DESIRING", type='Song'), Node(id='larger memory', type='Memory'), Node(id='this nice tree structure', type='Structure'), Node(id='how you can do it all with the numbers', type='Process'), Node(id='JOHANN SEBASTIAN BACH', type='Composer'), Node(id='type of structure', type='Characteristic'), Node(id='that', type='Pronoun'), Node(id='we', type='Pronoun'), Node(id='worry', type='Verb')] ``` The results are a little inconsistent because the GPT 3.5 model may produce incomplete json due to the token limit, but that could be solved (or mitigated) by checking for a complete json when parsing it. --- .../graph_transformers/llm.py | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/libs/experimental/langchain_experimental/graph_transformers/llm.py b/libs/experimental/langchain_experimental/graph_transformers/llm.py index 16023ab452d..8886a550684 100644 --- a/libs/experimental/langchain_experimental/graph_transformers/llm.py +++ b/libs/experimental/langchain_experimental/graph_transformers/llm.py @@ -242,22 +242,31 @@ def create_unstructured_prompt( system_message = SystemMessage(content=system_prompt) parser = JsonOutputParser(pydantic_object=UnstructuredRelation) + human_string_parts = [ + "Based on the following example, extract entities and " + "relations from the provided text.\n\n", + "Use the following entity types, don't use other entity " + "that is not defined below:" + "# ENTITY TYPES:" + "{node_labels}" + if node_labels + else "", + "Use the following relation types, don't use other relation " + "that is not defined below:" + "# RELATION TYPES:" + "{rel_types}" + if rel_types + else "", + "Below are a number of examples of text and their extracted " + "entities and relationships." + "{examples}\n" + "For the following text, extract entities and relations as " + "in the provided example." + "{format_instructions}\nText: {input}", + ] + human_prompt_string = "\n".join(filter(None, human_string_parts)) human_prompt = PromptTemplate( - template="""Based on the following example, extract entities and -relations from the provided text.\n\n -Use the following entity types, don't use other entity that is not defined below: -# ENTITY TYPES: -{node_labels} - -Use the following relation types, don't use other relation that is not defined below: -# RELATION TYPES: -{rel_types} - -Below are a number of examples of text and their extracted entities and relationships. -{examples} - -For the following text, extract entities and relations as in the provided example. -{format_instructions}\nText: {input}""", + template=human_prompt_string, input_variables=["input"], partial_variables={ "format_instructions": parser.get_format_instructions(),