From 1fe10a3e3d70c4a3a21dcd199170011bda960ce8 Mon Sep 17 00:00:00 2001 From: Juan Jose Miguel Ovalle Villamil <70274018+jjovalle99@users.noreply.github.com> Date: Wed, 27 Mar 2024 01:40:21 -0500 Subject: [PATCH] experimental[patch]: Enhance LLMGraphTransformer with async processing and improved readability (#19205) - [x] **PR title**: "experimental: Enhance LLMGraphTransformer with async processing and improved readability" - [x] **PR message**: - **Description:** This pull request refactors the `process_response` and `convert_to_graph_documents` methods in the LLMGraphTransformer class to improve code readability and adds async versions of these methods for concurrent processing. The main changes include: - Simplifying list comprehensions and conditional logic in the process_response method for better readability. - Adding async versions aprocess_response and aconvert_to_graph_documents to enable concurrent processing of documents. These enhancements aim to improve the overall efficiency and maintainability of the `LLMGraphTransformer` class. - **Issue:** N/A - **Dependencies:** No additional dependencies required. - **Twitter handle:** @jjovalle99 - [x] **Add tests and docs**: N/A (This PR does not introduce a new integration) - [x] **Lint and test**: Ran make format, make lint, and make test from the root of the modified package(s). All tests pass successfully. Additional notes: - The changes made in this PR are backwards compatible and do not introduce any breaking changes. - The PR touches only the `LLMGraphTransformer` class within the experimental package. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> --- .../graph_transformers/llm.py | 93 +++++++++++++------ 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/libs/experimental/langchain_experimental/graph_transformers/llm.py b/libs/experimental/langchain_experimental/graph_transformers/llm.py index 34ff03a0099..34c069b772e 100644 --- a/libs/experimental/langchain_experimental/graph_transformers/llm.py +++ b/libs/experimental/langchain_experimental/graph_transformers/llm.py @@ -1,3 +1,4 @@ +import asyncio from typing import Any, List, Optional, Sequence from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship @@ -207,29 +208,20 @@ class LLMGraphTransformer: """ text = document.page_content raw_schema = self.chain.invoke({"input": text}) - if raw_schema.nodes: - nodes = [map_to_base_node(node) for node in raw_schema.nodes] - else: - nodes = [] - if raw_schema.relationships: - relationships = [ - map_to_base_relationship(rel) for rel in raw_schema.relationships - ] - else: - relationships = [] + nodes = ( + [map_to_base_node(node) for node in raw_schema.nodes] + if raw_schema.nodes + else [] + ) + relationships = ( + [map_to_base_relationship(rel) for rel in raw_schema.relationships] + if raw_schema.relationships + else [] + ) # Strict mode filtering if self.strict_mode and (self.allowed_nodes or self.allowed_relationships): - if self.allowed_relationships and self.allowed_nodes: - nodes = [node for node in nodes if node.type in self.allowed_nodes] - relationships = [ - rel - for rel in relationships - if rel.type in self.allowed_relationships - and rel.source.type in self.allowed_nodes - and rel.target.type in self.allowed_nodes - ] - elif self.allowed_nodes and not self.allowed_relationships: + if self.allowed_nodes: nodes = [node for node in nodes if node.type in self.allowed_nodes] relationships = [ rel @@ -237,17 +229,14 @@ class LLMGraphTransformer: if rel.source.type in self.allowed_nodes and rel.target.type in self.allowed_nodes ] - if self.allowed_relationships and not self.allowed_nodes: + if self.allowed_relationships: relationships = [ rel for rel in relationships if rel.type in self.allowed_relationships ] - graph_document = GraphDocument( - nodes=nodes, relationships=relationships, source=document - ) - return graph_document + return GraphDocument(nodes=nodes, relationships=relationships, source=document) def convert_to_graph_documents( self, documents: Sequence[Document] @@ -261,8 +250,54 @@ class LLMGraphTransformer: Returns: Sequence[GraphDocument]: The transformed documents as graphs. """ - results = [] - for document in documents: - graph_document = self.process_response(document) - results.append(graph_document) + return [self.process_response(document) for document in documents] + + async def aprocess_response(self, document: Document) -> GraphDocument: + """ + Asynchronously processes a single document, transforming it into a + graph document. + """ + text = document.page_content + raw_schema = await self.chain.ainvoke({"input": text}) + + nodes = ( + [map_to_base_node(node) for node in raw_schema.nodes] + if raw_schema.nodes + else [] + ) + relationships = ( + [map_to_base_relationship(rel) for rel in raw_schema.relationships] + if raw_schema.relationships + else [] + ) + + if self.strict_mode and (self.allowed_nodes or self.allowed_relationships): + if self.allowed_nodes: + nodes = [node for node in nodes if node.type in self.allowed_nodes] + relationships = [ + rel + for rel in relationships + if rel.source.type in self.allowed_nodes + and rel.target.type in self.allowed_nodes + ] + if self.allowed_relationships: + relationships = [ + rel + for rel in relationships + if rel.type in self.allowed_relationships + ] + + return GraphDocument(nodes=nodes, relationships=relationships, source=document) + + async def aconvert_to_graph_documents( + self, documents: Sequence[Document] + ) -> List[GraphDocument]: + """ + Asynchronously convert a sequence of documents into graph documents. + """ + tasks = [ + asyncio.create_task(self.aprocess_response(document)) + for document in documents + ] + results = await asyncio.gather(*tasks) return results