From 8799b028a6fa09dfefda92ea9a8bfc883c4e1aaf Mon Sep 17 00:00:00 2001
From: Mohammed Naqi <60170196+CsEnox@users.noreply.github.com>
Date: Tue, 16 Jan 2024 00:09:25 +0530
Subject: [PATCH] community[minor]: Adding asynchronous function implementation
 for Doctran (#15941)

## Description
In this update, I addressed the missing implementation for
atransform_document, which is the asynchronous counterpart of
transform_document in Doctran.

### Usage Example:
```py
# Instantiate DoctranPropertyExtractor with specified properties
property_extractor = DoctranPropertyExtractor(properties=properties)

# Asynchronously extract properties from a list of documents
extracted_document = await property_extractor.atransform_documents(
    documents, properties=properties
)

# Display metadata of the first extracted document
print(json.dumps(extracted_document[0].metadata, indent=2))

```

## Issue
- Pull request #14525 has caused a break in the aforementioned code.
Instead of removing an asynchronous implementation of a function,
consider implementing a synchronous version alongside it.
---
 .../doctran_text_extract.py                   | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/document_transformers/doctran_text_extract.py b/libs/community/langchain_community/document_transformers/doctran_text_extract.py
index eee109193ee..e942eafdde8 100644
--- a/libs/community/langchain_community/document_transformers/doctran_text_extract.py
+++ b/libs/community/langchain_community/document_transformers/doctran_text_extract.py
@@ -66,7 +66,27 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
     async def atransform_documents(
         self, documents: Sequence[Document], **kwargs: Any
     ) -> Sequence[Document]:
-        raise NotImplementedError
+        """Extracts properties from text documents using doctran."""
+        try:
+            from doctran import Doctran, ExtractProperty
+
+            doctran = Doctran(
+                openai_api_key=self.openai_api_key, openai_model=self.openai_api_model
+            )
+        except ImportError:
+            raise ImportError(
+                "Install doctran to use this parser. (pip install doctran)"
+            )
+        properties = [ExtractProperty(**property) for property in self.properties]
+        for d in documents:
+            doctran_doc = (
+                doctran.parse(content=d.page_content)
+                .extract(properties=properties)
+                .execute()
+            )
+
+            d.metadata["extracted_properties"] = doctran_doc.extracted_properties
+        return documents
 
     def transform_documents(
         self, documents: Sequence[Document], **kwargs: Any