community: Make doctran synchronous (#15264)

### Description I found that the methods in [the doctran library](https://github.com/psychic-api/doctran) have been restructured into [synchronized versions](14944a59f7), And [the example ipynb](https://github.com/psychic-api/doctran/blob/main/examples.ipynb) also shows that the code is synchronized, but the README has not been updated yet. so we need to modify the code and update the documentation. ### Issue https://github.com/langchain-ai/langchain/issues/14645
2025-09-01 11:02:37 +00:00 · 2023-12-28 10:05:24 -06:00
parent 9a16590aa9
commit a464eb4394
6 changed files with 51 additions and 67 deletions
--- a/libs/community/langchain_community/document_transformers/doctran_text_extract.py
+++ b/libs/community/langchain_community/document_transformers/doctran_text_extract.py
@@ -63,12 +63,12 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
            "openai_api_model", "OPENAI_API_MODEL"
        )

-    def transform_documents(
+    async def atransform_documents(
        self, documents: Sequence[Document], **kwargs: Any
    ) -> Sequence[Document]:
        raise NotImplementedError

-    async def atransform_documents(
+    def transform_documents(
        self, documents: Sequence[Document], **kwargs: Any
    ) -> Sequence[Document]:
        """Extracts properties from text documents using doctran."""
@@ -85,7 +85,7 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
        properties = [ExtractProperty(**property) for property in self.properties]
        for d in documents:
            doctran_doc = (
-                await doctran.parse(content=d.page_content)
+                doctran.parse(content=d.page_content)
                .extract(properties=properties)
                .execute()
            )
--- a/libs/community/langchain_community/document_transformers/doctran_text_qa.py
+++ b/libs/community/langchain_community/document_transformers/doctran_text_qa.py
@@ -33,12 +33,12 @@ class DoctranQATransformer(BaseDocumentTransformer):
            "openai_api_model", "OPENAI_API_MODEL"
        )

-    def transform_documents(
+    async def atransform_documents(
        self, documents: Sequence[Document], **kwargs: Any
    ) -> Sequence[Document]:
        raise NotImplementedError

-    async def atransform_documents(
+    def transform_documents(
        self, documents: Sequence[Document], **kwargs: Any
    ) -> Sequence[Document]:
        """Extracts QA from text documents using doctran."""
@@ -53,9 +53,7 @@ class DoctranQATransformer(BaseDocumentTransformer):
                "Install doctran to use this parser. (pip install doctran)"
            )
        for d in documents:
-            doctran_doc = (
-                await doctran.parse(content=d.page_content).interrogate().execute()
-            )
+            doctran_doc = doctran.parse(content=d.page_content).interrogate().execute()
            questions_and_answers = doctran_doc.extracted_properties.get(
                "questions_and_answers"
            )
--- a/libs/community/langchain_community/document_transformers/doctran_text_translate.py
+++ b/libs/community/langchain_community/document_transformers/doctran_text_translate.py
@@ -36,12 +36,12 @@ class DoctranTextTranslator(BaseDocumentTransformer):
        )
        self.language = language

-    def transform_documents(
+    async def atransform_documents(
        self, documents: Sequence[Document], **kwargs: Any
    ) -> Sequence[Document]:
        raise NotImplementedError

-    async def atransform_documents(
+    def transform_documents(
        self, documents: Sequence[Document], **kwargs: Any
    ) -> Sequence[Document]:
        """Translates text documents using doctran."""
@@ -60,7 +60,7 @@ class DoctranTextTranslator(BaseDocumentTransformer):
            for doc in documents
        ]
        for i, doc in enumerate(doctran_docs):
-            doctran_docs[i] = await doc.translate(language=self.language).execute()
+            doctran_docs[i] = doc.translate(language=self.language).execute()
        return [
            Document(page_content=doc.transformed_content, metadata=doc.metadata)
            for doc in doctran_docs