""" Question answering over an RDF or OWL graph using SPARQL. """ from __future__ import annotations from typing import Any, Dict, List, Optional from langchain.chains.base import Chain from langchain.chains.llm import LLMChain from langchain_core.callbacks.manager import CallbackManagerForChainRun from langchain_core.language_models import BaseLanguageModel from langchain_core.prompts.base import BasePromptTemplate from langchain_core.prompts.prompt import PromptTemplate from langchain_core.pydantic_v1 import Field from langchain_community.chains.graph_qa.prompts import SPARQL_QA_PROMPT from langchain_community.graphs import NeptuneRdfGraph INTERMEDIATE_STEPS_KEY = "intermediate_steps" SPARQL_GENERATION_TEMPLATE = """ Task: Generate a SPARQL SELECT statement for querying a graph database. For instance, to find all email addresses of John Doe, the following query in backticks would be suitable: ``` PREFIX foaf: SELECT ?email WHERE {{ ?person foaf:name "John Doe" . ?person foaf:mbox ?email . }} ``` Instructions: Use only the node types and properties provided in the schema. Do not use any node types and properties that are not explicitly provided. Include all necessary prefixes. Examples: Schema: {schema} Note: Be as concise as possible. Do not include any explanations or apologies in your responses. Do not respond to any questions that ask for anything else than for you to construct a SPARQL query. Do not include any text except the SPARQL query generated. The question is: {prompt}""" SPARQL_GENERATION_PROMPT = PromptTemplate( input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE ) def extract_sparql(query: str) -> str: """Extract SPARQL code from a text. Args: query: Text to extract SPARQL code from. Returns: SPARQL code extracted from the text. """ query = query.strip() querytoks = query.split("```") if len(querytoks) == 3: query = querytoks[1] if query.startswith("sparql"): query = query[6:] elif query.startswith("") and query.endswith(""): query = query[8:-9] return query class NeptuneSparqlQAChain(Chain): """Chain for question-answering against a Neptune graph by generating SPARQL statements. *Security note*: Make sure that the database connection uses credentials that are narrowly-scoped to only include necessary permissions. Failure to do so may result in data corruption or loss, since the calling code may attempt commands that would result in deletion, mutation of data if appropriately prompted or reading sensitive data if such data is present in the database. The best way to guard against such negative outcomes is to (as appropriate) limit the permissions granted to the credentials used with this tool. See https://python.langchain.com/docs/security for more information. Example: .. code-block:: python chain = NeptuneSparqlQAChain.from_llm( llm=llm, graph=graph ) response = chain.invoke(query) """ graph: NeptuneRdfGraph = Field(exclude=True) sparql_generation_chain: LLMChain qa_chain: LLMChain input_key: str = "query" #: :meta private: output_key: str = "result" #: :meta private: top_k: int = 10 return_intermediate_steps: bool = False """Whether or not to return the intermediate steps along with the final answer.""" return_direct: bool = False """Whether or not to return the result of querying the graph directly.""" extra_instructions: Optional[str] = None """Extra instructions by the appended to the query generation prompt.""" @property def input_keys(self) -> List[str]: return [self.input_key] @property def output_keys(self) -> List[str]: _output_keys = [self.output_key] return _output_keys @classmethod def from_llm( cls, llm: BaseLanguageModel, *, qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT, sparql_prompt: BasePromptTemplate = SPARQL_GENERATION_PROMPT, examples: Optional[str] = None, **kwargs: Any, ) -> NeptuneSparqlQAChain: """Initialize from LLM.""" qa_chain = LLMChain(llm=llm, prompt=qa_prompt) template_to_use = SPARQL_GENERATION_TEMPLATE if examples: template_to_use = template_to_use.replace( "Examples:", "Examples: " + examples ) sparql_prompt = PromptTemplate( input_variables=["schema", "prompt"], template=template_to_use ) sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt) return cls( # type: ignore[call-arg] qa_chain=qa_chain, sparql_generation_chain=sparql_generation_chain, examples=examples, **kwargs, ) def _call( self, inputs: Dict[str, Any], run_manager: Optional[CallbackManagerForChainRun] = None, ) -> Dict[str, str]: """ Generate SPARQL query, use it to retrieve a response from the gdb and answer the question. """ _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() callbacks = _run_manager.get_child() prompt = inputs[self.input_key] intermediate_steps: List = [] generated_sparql = self.sparql_generation_chain.run( {"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks ) # Extract SPARQL generated_sparql = extract_sparql(generated_sparql) _run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose) _run_manager.on_text( generated_sparql, color="green", end="\n", verbose=self.verbose ) intermediate_steps.append({"query": generated_sparql}) context = self.graph.query(generated_sparql) if self.return_direct: final_result = context else: _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose) _run_manager.on_text( str(context), color="green", end="\n", verbose=self.verbose ) intermediate_steps.append({"context": context}) result = self.qa_chain( {"prompt": prompt, "context": context}, callbacks=callbacks, ) final_result = result[self.qa_chain.output_key] chain_result: Dict[str, Any] = {self.output_key: final_result} if self.return_intermediate_steps: chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps return chain_result