community(doc_loaders): allow any credential type in AzureAIDocumentI… (#29289)

allow any credential type in AzureAIDocumentInteligence, not only
`api_key`.
This allows to use any of the credentials types integrated with AD.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
Adrián Panella 2025-01-27 14:56:30 -06:00 committed by GitHub
parent f00c66cc1f
commit 1551d9750c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 39 additions and 5 deletions

View File

@ -1,4 +1,6 @@
from typing import Iterator, List, Optional
from __future__ import annotations
from typing import TYPE_CHECKING, Iterator, List, Optional
from langchain_core.documents import Document
@ -8,6 +10,9 @@ from langchain_community.document_loaders.parsers import (
AzureAIDocumentIntelligenceParser,
)
if TYPE_CHECKING:
from azure.core.credentials import TokenCredential
class AzureAIDocumentIntelligenceLoader(BaseLoader):
"""Load a PDF with Azure Document Intelligence."""
@ -15,7 +20,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
file_path: Optional[str] = None,
url_path: Optional[str] = None,
bytes_source: Optional[bytes] = None,
@ -24,6 +29,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
mode: str = "markdown",
*,
analysis_features: Optional[List[str]] = None,
azure_credential: Optional["TokenCredential"] = None,
) -> None:
"""
Initialize the object for file processing with Azure Document Intelligence
@ -63,6 +69,9 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
List of optional analysis features, each feature should be passed
as a str that conforms to the enum `DocumentAnalysisFeature` in
`azure-ai-documentintelligence` package. Default value is None.
azure_credential: Optional[TokenCredential]
The credentials to use for DocumentIntelligenceClient construction, when
using credentials other than api_key (like AD).
Examples:
---------
@ -79,6 +88,15 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
assert (
file_path is not None or url_path is not None or bytes_source is not None
), "file_path, url_path or bytes_source must be provided"
assert (
api_key is not None or azure_credential is not None
), "Either api_key or azure_credential must be provided."
assert (
api_key is None or azure_credential is None
), "Only one of api_key or azure_credential should be provided."
self.file_path = file_path
self.url_path = url_path
self.bytes_source = bytes_source
@ -90,6 +108,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
api_model=api_model,
mode=mode,
analysis_features=analysis_features,
azure_credential=azure_credential,
)
def lazy_load(

View File

@ -1,11 +1,16 @@
from __future__ import annotations
import logging
from typing import Any, Iterator, List, Optional
from typing import TYPE_CHECKING, Any, Iterator, List, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob
if TYPE_CHECKING:
from azure.core.credentials import TokenCredential
logger = logging.getLogger(__name__)
@ -16,17 +21,27 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
def __init__(
self,
api_endpoint: str,
api_key: str,
api_key: Optional[str] = None,
api_version: Optional[str] = None,
api_model: str = "prebuilt-layout",
mode: str = "markdown",
analysis_features: Optional[List[str]] = None,
azure_credential: Optional["TokenCredential"] = None,
):
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature
from azure.core.credentials import AzureKeyCredential
kwargs = {}
if api_key is None and azure_credential is None:
raise ValueError("Either api_key or azure_credential must be provided.")
if api_key and azure_credential:
raise ValueError(
"Only one of api_key or azure_credential should be provided."
)
if api_version is not None:
kwargs["api_version"] = api_version
@ -49,7 +64,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
self.client = DocumentIntelligenceClient(
endpoint=api_endpoint,
credential=AzureKeyCredential(api_key),
credential=azure_credential or AzureKeyCredential(api_key),
headers={"x-ms-useragent": "langchain-parser/1.0.0"},
features=analysis_features,
**kwargs,