mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 12:59:07 +00:00
community: bytes as a source to AzureAIDocumentIntelligenceLoader
(#26618)
- **Description:** This PR adds functionality to pass in in-memory bytes as a source to `AzureAIDocumentIntelligenceLoader`. - **Issue:** I needed the functionality, so I added it. - **Dependencies:** NA - **Twitter handle:** @akseljoonas if this is a big enough change :) --------- Co-authored-by: Aksel Joonas Reedi <aksel@klippa.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
committed by
GitHub
parent
7a9149f5dd
commit
2cb39270ec
@@ -109,3 +109,21 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
|
||||
yield from self._generate_docs_page(result)
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {self.mode}")
|
||||
|
||||
def parse_bytes(self, bytes_source: bytes) -> Iterator[Document]:
|
||||
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
|
||||
|
||||
poller = self.client.begin_analyze_document(
|
||||
self.api_model,
|
||||
analyze_request=AnalyzeDocumentRequest(bytes_source=bytes_source),
|
||||
# content_type="application/octet-stream",
|
||||
output_content_format="markdown" if self.mode == "markdown" else "text",
|
||||
)
|
||||
result = poller.result()
|
||||
|
||||
if self.mode in ["single", "markdown"]:
|
||||
yield from self._generate_docs_single(result)
|
||||
elif self.mode in ["page"]:
|
||||
yield from self._generate_docs_page(result)
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {self.mode}")
|
||||
|
Reference in New Issue
Block a user