mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-08 04:25:46 +00:00
community[minor]: Adding asynchronous function implementation for Doctran (#15941)
## Description In this update, I addressed the missing implementation for atransform_document, which is the asynchronous counterpart of transform_document in Doctran. ### Usage Example: ```py # Instantiate DoctranPropertyExtractor with specified properties property_extractor = DoctranPropertyExtractor(properties=properties) # Asynchronously extract properties from a list of documents extracted_document = await property_extractor.atransform_documents( documents, properties=properties ) # Display metadata of the first extracted document print(json.dumps(extracted_document[0].metadata, indent=2)) ``` ## Issue - Pull request #14525 has caused a break in the aforementioned code. Instead of removing an asynchronous implementation of a function, consider implementing a synchronous version alongside it.
This commit is contained in:
parent
fb7e66b809
commit
8799b028a6
@ -66,7 +66,27 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
|
|||||||
async def atransform_documents(
|
async def atransform_documents(
|
||||||
self, documents: Sequence[Document], **kwargs: Any
|
self, documents: Sequence[Document], **kwargs: Any
|
||||||
) -> Sequence[Document]:
|
) -> Sequence[Document]:
|
||||||
raise NotImplementedError
|
"""Extracts properties from text documents using doctran."""
|
||||||
|
try:
|
||||||
|
from doctran import Doctran, ExtractProperty
|
||||||
|
|
||||||
|
doctran = Doctran(
|
||||||
|
openai_api_key=self.openai_api_key, openai_model=self.openai_api_model
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Install doctran to use this parser. (pip install doctran)"
|
||||||
|
)
|
||||||
|
properties = [ExtractProperty(**property) for property in self.properties]
|
||||||
|
for d in documents:
|
||||||
|
doctran_doc = (
|
||||||
|
doctran.parse(content=d.page_content)
|
||||||
|
.extract(properties=properties)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
|
||||||
|
d.metadata["extracted_properties"] = doctran_doc.extracted_properties
|
||||||
|
return documents
|
||||||
|
|
||||||
def transform_documents(
|
def transform_documents(
|
||||||
self, documents: Sequence[Document], **kwargs: Any
|
self, documents: Sequence[Document], **kwargs: Any
|
||||||
|
Loading…
Reference in New Issue
Block a user