mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-07 20:15:40 +00:00
community[minor]: Adding asynchronous function implementation for Doctran (#15941)
## Description In this update, I addressed the missing implementation for atransform_document, which is the asynchronous counterpart of transform_document in Doctran. ### Usage Example: ```py # Instantiate DoctranPropertyExtractor with specified properties property_extractor = DoctranPropertyExtractor(properties=properties) # Asynchronously extract properties from a list of documents extracted_document = await property_extractor.atransform_documents( documents, properties=properties ) # Display metadata of the first extracted document print(json.dumps(extracted_document[0].metadata, indent=2)) ``` ## Issue - Pull request #14525 has caused a break in the aforementioned code. Instead of removing an asynchronous implementation of a function, consider implementing a synchronous version alongside it.
This commit is contained in:
parent
fb7e66b809
commit
8799b028a6
@ -66,7 +66,27 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
"""Extracts properties from text documents using doctran."""
|
||||
try:
|
||||
from doctran import Doctran, ExtractProperty
|
||||
|
||||
doctran = Doctran(
|
||||
openai_api_key=self.openai_api_key, openai_model=self.openai_api_model
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Install doctran to use this parser. (pip install doctran)"
|
||||
)
|
||||
properties = [ExtractProperty(**property) for property in self.properties]
|
||||
for d in documents:
|
||||
doctran_doc = (
|
||||
doctran.parse(content=d.page_content)
|
||||
.extract(properties=properties)
|
||||
.execute()
|
||||
)
|
||||
|
||||
d.metadata["extracted_properties"] = doctran_doc.extracted_properties
|
||||
return documents
|
||||
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
|
Loading…
Reference in New Issue
Block a user