mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 11:02:37 +00:00
community: Make doctran synchronous (#15264)
### Description
I found that the methods in [the doctran
library](https://github.com/psychic-api/doctran) have been restructured
into [synchronized
versions](14944a59f7
),
And [the example
ipynb](https://github.com/psychic-api/doctran/blob/main/examples.ipynb)
also shows that the code is synchronized, but the README has not been
updated yet.
so we need to modify the code and update the documentation.
### Issue
https://github.com/langchain-ai/langchain/issues/14645
This commit is contained in:
@@ -63,12 +63,12 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
|
||||
"openai_api_model", "OPENAI_API_MODEL"
|
||||
)
|
||||
|
||||
def transform_documents(
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def atransform_documents(
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Extracts properties from text documents using doctran."""
|
||||
@@ -85,7 +85,7 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
|
||||
properties = [ExtractProperty(**property) for property in self.properties]
|
||||
for d in documents:
|
||||
doctran_doc = (
|
||||
await doctran.parse(content=d.page_content)
|
||||
doctran.parse(content=d.page_content)
|
||||
.extract(properties=properties)
|
||||
.execute()
|
||||
)
|
||||
|
@@ -33,12 +33,12 @@ class DoctranQATransformer(BaseDocumentTransformer):
|
||||
"openai_api_model", "OPENAI_API_MODEL"
|
||||
)
|
||||
|
||||
def transform_documents(
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def atransform_documents(
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Extracts QA from text documents using doctran."""
|
||||
@@ -53,9 +53,7 @@ class DoctranQATransformer(BaseDocumentTransformer):
|
||||
"Install doctran to use this parser. (pip install doctran)"
|
||||
)
|
||||
for d in documents:
|
||||
doctran_doc = (
|
||||
await doctran.parse(content=d.page_content).interrogate().execute()
|
||||
)
|
||||
doctran_doc = doctran.parse(content=d.page_content).interrogate().execute()
|
||||
questions_and_answers = doctran_doc.extracted_properties.get(
|
||||
"questions_and_answers"
|
||||
)
|
||||
|
@@ -36,12 +36,12 @@ class DoctranTextTranslator(BaseDocumentTransformer):
|
||||
)
|
||||
self.language = language
|
||||
|
||||
def transform_documents(
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def atransform_documents(
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Translates text documents using doctran."""
|
||||
@@ -60,7 +60,7 @@ class DoctranTextTranslator(BaseDocumentTransformer):
|
||||
for doc in documents
|
||||
]
|
||||
for i, doc in enumerate(doctran_docs):
|
||||
doctran_docs[i] = await doc.translate(language=self.language).execute()
|
||||
doctran_docs[i] = doc.translate(language=self.language).execute()
|
||||
return [
|
||||
Document(page_content=doc.transformed_content, metadata=doc.metadata)
|
||||
for doc in doctran_docs
|
||||
|
Reference in New Issue
Block a user