community: Make doctran synchronous (#15264)

### Description

I found that the methods in [the doctran
library](https://github.com/psychic-api/doctran) have been restructured
into [synchronized
versions](14944a59f7),

And [the example
ipynb](https://github.com/psychic-api/doctran/blob/main/examples.ipynb)
also shows that the code is synchronized, but the README has not been
updated yet.

so we need to modify the code and update the documentation.

### Issue

https://github.com/langchain-ai/langchain/issues/14645
This commit is contained in:
Bob Lin
2023-12-28 10:05:24 -06:00
committed by GitHub
parent 9a16590aa9
commit a464eb4394
6 changed files with 51 additions and 67 deletions

View File

@@ -63,12 +63,12 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
"openai_api_model", "OPENAI_API_MODEL"
)
def transform_documents(
async def atransform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
raise NotImplementedError
async def atransform_documents(
def transform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
"""Extracts properties from text documents using doctran."""
@@ -85,7 +85,7 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
properties = [ExtractProperty(**property) for property in self.properties]
for d in documents:
doctran_doc = (
await doctran.parse(content=d.page_content)
doctran.parse(content=d.page_content)
.extract(properties=properties)
.execute()
)

View File

@@ -33,12 +33,12 @@ class DoctranQATransformer(BaseDocumentTransformer):
"openai_api_model", "OPENAI_API_MODEL"
)
def transform_documents(
async def atransform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
raise NotImplementedError
async def atransform_documents(
def transform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
"""Extracts QA from text documents using doctran."""
@@ -53,9 +53,7 @@ class DoctranQATransformer(BaseDocumentTransformer):
"Install doctran to use this parser. (pip install doctran)"
)
for d in documents:
doctran_doc = (
await doctran.parse(content=d.page_content).interrogate().execute()
)
doctran_doc = doctran.parse(content=d.page_content).interrogate().execute()
questions_and_answers = doctran_doc.extracted_properties.get(
"questions_and_answers"
)

View File

@@ -36,12 +36,12 @@ class DoctranTextTranslator(BaseDocumentTransformer):
)
self.language = language
def transform_documents(
async def atransform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
raise NotImplementedError
async def atransform_documents(
def transform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
"""Translates text documents using doctran."""
@@ -60,7 +60,7 @@ class DoctranTextTranslator(BaseDocumentTransformer):
for doc in documents
]
for i, doc in enumerate(doctran_docs):
doctran_docs[i] = await doc.translate(language=self.language).execute()
doctran_docs[i] = doc.translate(language=self.language).execute()
return [
Document(page_content=doc.transformed_content, metadata=doc.metadata)
for doc in doctran_docs