community: bytes as a source to AzureAIDocumentIntelligenceLoader (#26618)

- **Description:** This PR adds functionality to pass in in-memory bytes as a source to `AzureAIDocumentIntelligenceLoader`. - **Issue:** I needed the functionality, so I added it. - **Dependencies:** NA - **Twitter handle:** @akseljoonas if this is a big enough change :) --------- Co-authored-by: Aksel Joonas Reedi <aksel@klippa.com> Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-12 12:59:07 +00:00 · 2024-11-07 04:40:21 +01:00
parent 7a9149f5dd
commit 2cb39270ec
2 changed files with 33 additions and 6 deletions
--- a/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
+++ b/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
@@ -109,3 +109,21 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
            yield from self._generate_docs_page(result)
        else:
            raise ValueError(f"Invalid mode: {self.mode}")
+
+    def parse_bytes(self, bytes_source: bytes) -> Iterator[Document]:
+        from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
+
+        poller = self.client.begin_analyze_document(
+            self.api_model,
+            analyze_request=AnalyzeDocumentRequest(bytes_source=bytes_source),
+            # content_type="application/octet-stream",
+            output_content_format="markdown" if self.mode == "markdown" else "text",
+        )
+        result = poller.result()
+
+        if self.mode in ["single", "markdown"]:
+            yield from self._generate_docs_single(result)
+        elif self.mode in ["page"]:
+            yield from self._generate_docs_page(result)
+        else:
+            raise ValueError(f"Invalid mode: {self.mode}")