mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-31 16:08:59 +00:00 
			
		
		
		
	Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
		
			
				
	
	
		
			298 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			298 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| from typing import (
 | |
|     TYPE_CHECKING,
 | |
|     List,
 | |
|     Optional,
 | |
| )
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     import rdflib
 | |
| 
 | |
| prefixes = {
 | |
|     "owl": """PREFIX owl: <http://www.w3.org/2002/07/owl#>\n""",
 | |
|     "rdf": """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n""",
 | |
|     "rdfs": """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n""",
 | |
|     "xsd": """PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n""",
 | |
| }
 | |
| 
 | |
| cls_query_rdf = prefixes["rdfs"] + (
 | |
|     """SELECT DISTINCT ?cls ?com\n"""
 | |
|     """WHERE { \n"""
 | |
|     """    ?instance a ?cls . \n"""
 | |
|     """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
 | |
|     """}"""
 | |
| )
 | |
| 
 | |
| cls_query_rdfs = prefixes["rdfs"] + (
 | |
|     """SELECT DISTINCT ?cls ?com\n"""
 | |
|     """WHERE { \n"""
 | |
|     """    ?instance a/rdfs:subClassOf* ?cls . \n"""
 | |
|     """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
 | |
|     """}"""
 | |
| )
 | |
| 
 | |
| cls_query_owl = prefixes["rdfs"] + (
 | |
|     """SELECT DISTINCT ?cls ?com\n"""
 | |
|     """WHERE { \n"""
 | |
|     """    ?instance a/rdfs:subClassOf* ?cls . \n"""
 | |
|     """    FILTER (isIRI(?cls)) . \n"""
 | |
|     """    OPTIONAL { ?cls rdfs:comment ?com } \n"""
 | |
|     """}"""
 | |
| )
 | |
| 
 | |
| rel_query_rdf = prefixes["rdfs"] + (
 | |
|     """SELECT DISTINCT ?rel ?com\n"""
 | |
|     """WHERE { \n"""
 | |
|     """    ?subj ?rel ?obj . \n"""
 | |
|     """    OPTIONAL { ?rel rdfs:comment ?com } \n"""
 | |
|     """}"""
 | |
| )
 | |
| 
 | |
| rel_query_rdfs = (
 | |
|     prefixes["rdf"]
 | |
|     + prefixes["rdfs"]
 | |
|     + (
 | |
|         """SELECT DISTINCT ?rel ?com\n"""
 | |
|         """WHERE { \n"""
 | |
|         """    ?rel a/rdfs:subPropertyOf* rdf:Property . \n"""
 | |
|         """    OPTIONAL { ?rel rdfs:comment ?com } \n"""
 | |
|         """}"""
 | |
|     )
 | |
| )
 | |
| 
 | |
| op_query_owl = (
 | |
|     prefixes["rdfs"]
 | |
|     + prefixes["owl"]
 | |
|     + (
 | |
|         """SELECT DISTINCT ?op ?com\n"""
 | |
|         """WHERE { \n"""
 | |
|         """    ?op a/rdfs:subPropertyOf* owl:ObjectProperty . \n"""
 | |
|         """    OPTIONAL { ?op rdfs:comment ?com } \n"""
 | |
|         """}"""
 | |
|     )
 | |
| )
 | |
| 
 | |
| dp_query_owl = (
 | |
|     prefixes["rdfs"]
 | |
|     + prefixes["owl"]
 | |
|     + (
 | |
|         """SELECT DISTINCT ?dp ?com\n"""
 | |
|         """WHERE { \n"""
 | |
|         """    ?dp a/rdfs:subPropertyOf* owl:DatatypeProperty . \n"""
 | |
|         """    OPTIONAL { ?dp rdfs:comment ?com } \n"""
 | |
|         """}"""
 | |
|     )
 | |
| )
 | |
| 
 | |
| 
 | |
| class RdfGraph:
 | |
|     """RDFlib wrapper for graph operations.
 | |
| 
 | |
|     Modes:
 | |
|     * local: Local file - can be queried and changed
 | |
|     * online: Online file - can only be queried, changes can be stored locally
 | |
|     * store: Triple store - can be queried and changed if update_endpoint available
 | |
|     Together with a source file, the serialization should be specified.
 | |
| 
 | |
|     *Security note*: Make sure that the database connection uses credentials
 | |
|         that are narrowly-scoped to only include necessary permissions.
 | |
|         Failure to do so may result in data corruption or loss, since the calling
 | |
|         code may attempt commands that would result in deletion, mutation
 | |
|         of data if appropriately prompted or reading sensitive data if such
 | |
|         data is present in the database.
 | |
|         The best way to guard against such negative outcomes is to (as appropriate)
 | |
|         limit the permissions granted to the credentials used with this tool.
 | |
| 
 | |
|         See https://python.langchain.com/docs/security for more information.
 | |
|     """
 | |
| 
 | |
|     def __init__(
 | |
|         self,
 | |
|         source_file: Optional[str] = None,
 | |
|         serialization: Optional[str] = "ttl",
 | |
|         query_endpoint: Optional[str] = None,
 | |
|         update_endpoint: Optional[str] = None,
 | |
|         standard: Optional[str] = "rdf",
 | |
|         local_copy: Optional[str] = None,
 | |
|     ) -> None:
 | |
|         """
 | |
|         Set up the RDFlib graph
 | |
| 
 | |
|         :param source_file: either a path for a local file or a URL
 | |
|         :param serialization: serialization of the input
 | |
|         :param query_endpoint: SPARQL endpoint for queries, read access
 | |
|         :param update_endpoint: SPARQL endpoint for UPDATE queries, write access
 | |
|         :param standard: RDF, RDFS, or OWL
 | |
|         :param local_copy: new local copy for storing changes
 | |
|         """
 | |
|         self.source_file = source_file
 | |
|         self.serialization = serialization
 | |
|         self.query_endpoint = query_endpoint
 | |
|         self.update_endpoint = update_endpoint
 | |
|         self.standard = standard
 | |
|         self.local_copy = local_copy
 | |
| 
 | |
|         try:
 | |
|             import rdflib
 | |
|             from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as default
 | |
|             from rdflib.plugins.stores import sparqlstore
 | |
|         except ImportError:
 | |
|             raise ValueError(
 | |
|                 "Could not import rdflib python package. "
 | |
|                 "Please install it with `pip install rdflib`."
 | |
|             )
 | |
|         if self.standard not in (supported_standards := ("rdf", "rdfs", "owl")):
 | |
|             raise ValueError(
 | |
|                 f"Invalid standard. Supported standards are: {supported_standards}."
 | |
|             )
 | |
| 
 | |
|         if (
 | |
|             not source_file
 | |
|             and not query_endpoint
 | |
|             or source_file
 | |
|             and (query_endpoint or update_endpoint)
 | |
|         ):
 | |
|             raise ValueError(
 | |
|                 "Could not unambiguously initialize the graph wrapper. "
 | |
|                 "Specify either a file (local or online) via the source_file "
 | |
|                 "or a triple store via the endpoints."
 | |
|             )
 | |
| 
 | |
|         if source_file:
 | |
|             if source_file.startswith("http"):
 | |
|                 self.mode = "online"
 | |
|             else:
 | |
|                 self.mode = "local"
 | |
|                 if self.local_copy is None:
 | |
|                     self.local_copy = self.source_file
 | |
|             self.graph = rdflib.Graph()
 | |
|             self.graph.parse(source_file, format=self.serialization)
 | |
| 
 | |
|         if query_endpoint:
 | |
|             self.mode = "store"
 | |
|             if not update_endpoint:
 | |
|                 self._store = sparqlstore.SPARQLStore()
 | |
|                 self._store.open(query_endpoint)
 | |
|             else:
 | |
|                 self._store = sparqlstore.SPARQLUpdateStore()
 | |
|                 self._store.open((query_endpoint, update_endpoint))
 | |
|             self.graph = rdflib.Graph(self._store, identifier=default)
 | |
| 
 | |
|         # Verify that the graph was loaded
 | |
|         if not len(self.graph):
 | |
|             raise AssertionError("The graph is empty.")
 | |
| 
 | |
|         # Set schema
 | |
|         self.schema = ""
 | |
|         self.load_schema()
 | |
| 
 | |
|     @property
 | |
|     def get_schema(self) -> str:
 | |
|         """
 | |
|         Returns the schema of the graph database.
 | |
|         """
 | |
|         return self.schema
 | |
| 
 | |
|     def query(
 | |
|         self,
 | |
|         query: str,
 | |
|     ) -> List[rdflib.query.ResultRow]:
 | |
|         """
 | |
|         Query the graph.
 | |
|         """
 | |
|         from rdflib.exceptions import ParserError
 | |
|         from rdflib.query import ResultRow
 | |
| 
 | |
|         try:
 | |
|             res = self.graph.query(query)
 | |
|         except ParserError as e:
 | |
|             raise ValueError("Generated SPARQL statement is invalid\n" f"{e}")
 | |
|         return [r for r in res if isinstance(r, ResultRow)]
 | |
| 
 | |
|     def update(
 | |
|         self,
 | |
|         query: str,
 | |
|     ) -> None:
 | |
|         """
 | |
|         Update the graph.
 | |
|         """
 | |
|         from rdflib.exceptions import ParserError
 | |
| 
 | |
|         try:
 | |
|             self.graph.update(query)
 | |
|         except ParserError as e:
 | |
|             raise ValueError("Generated SPARQL statement is invalid\n" f"{e}")
 | |
|         if self.local_copy:
 | |
|             self.graph.serialize(
 | |
|                 destination=self.local_copy, format=self.local_copy.split(".")[-1]
 | |
|             )
 | |
|         else:
 | |
|             raise ValueError("No target file specified for saving the updated file.")
 | |
| 
 | |
|     @staticmethod
 | |
|     def _get_local_name(iri: str) -> str:
 | |
|         if "#" in iri:
 | |
|             local_name = iri.split("#")[-1]
 | |
|         elif "/" in iri:
 | |
|             local_name = iri.split("/")[-1]
 | |
|         else:
 | |
|             raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.")
 | |
|         return local_name
 | |
| 
 | |
|     def _res_to_str(self, res: rdflib.query.ResultRow, var: str) -> str:
 | |
|         return (
 | |
|             "<"
 | |
|             + str(res[var])
 | |
|             + "> ("
 | |
|             + self._get_local_name(res[var])
 | |
|             + ", "
 | |
|             + str(res["com"])
 | |
|             + ")"
 | |
|         )
 | |
| 
 | |
|     def load_schema(self) -> None:
 | |
|         """
 | |
|         Load the graph schema information.
 | |
|         """
 | |
| 
 | |
|         def _rdf_s_schema(
 | |
|             classes: List[rdflib.query.ResultRow],
 | |
|             relationships: List[rdflib.query.ResultRow],
 | |
|         ) -> str:
 | |
|             return (
 | |
|                 f"In the following, each IRI is followed by the local name and "
 | |
|                 f"optionally its description in parentheses. \n"
 | |
|                 f"The RDF graph supports the following node types:\n"
 | |
|                 f'{", ".join([self._res_to_str(r, "cls") for r in classes])}\n'
 | |
|                 f"The RDF graph supports the following relationships:\n"
 | |
|                 f'{", ".join([self._res_to_str(r, "rel") for r in relationships])}\n'
 | |
|             )
 | |
| 
 | |
|         if self.standard == "rdf":
 | |
|             clss = self.query(cls_query_rdf)
 | |
|             rels = self.query(rel_query_rdf)
 | |
|             self.schema = _rdf_s_schema(clss, rels)
 | |
|         elif self.standard == "rdfs":
 | |
|             clss = self.query(cls_query_rdfs)
 | |
|             rels = self.query(rel_query_rdfs)
 | |
|             self.schema = _rdf_s_schema(clss, rels)
 | |
|         elif self.standard == "owl":
 | |
|             clss = self.query(cls_query_owl)
 | |
|             ops = self.query(op_query_owl)
 | |
|             dps = self.query(dp_query_owl)
 | |
|             self.schema = (
 | |
|                 f"In the following, each IRI is followed by the local name and "
 | |
|                 f"optionally its description in parentheses. \n"
 | |
|                 f"The OWL graph supports the following node types:\n"
 | |
|                 f'{", ".join([self._res_to_str(r, "cls") for r in clss])}\n'
 | |
|                 f"The OWL graph supports the following object properties, "
 | |
|                 f"i.e., relationships between objects:\n"
 | |
|                 f'{", ".join([self._res_to_str(r, "op") for r in ops])}\n'
 | |
|                 f"The OWL graph supports the following data properties, "
 | |
|                 f"i.e., relationships between objects and literals:\n"
 | |
|                 f'{", ".join([self._res_to_str(r, "dp") for r in dps])}\n'
 | |
|             )
 | |
|         else:
 | |
|             raise ValueError(f"Mode '{self.standard}' is currently not supported.")
 |