community[patch]: Microsoft Azure Document Intelligence updates (#16932)

- **Description:** Update Azure Document Intelligence implementation by
Microsoft team and RAG cookbook with Azure AI Search

---------

Co-authored-by: Lu Zhang (AI) <luzhan@microsoft.com>
Co-authored-by: Yateng Hong <yatengh@microsoft.com>
Co-authored-by: teethache <hongyateng2006@126.com>
Co-authored-by: Lu Zhang <44625949+luzhang06@users.noreply.github.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
Fabrizio Ruocco
2024-03-27 06:36:59 +00:00
committed by GitHub
parent cd79305eb9
commit f12cb0bea4
12 changed files with 708 additions and 71 deletions

View File

@@ -20,8 +20,44 @@ def test_doc_intelligence(mock_credential: MagicMock, mock_client: MagicMock) ->
mock_client.assert_called_once_with(
endpoint=endpoint,
credential=mock_credential(),
headers={"x-ms-useragent": "langchain-parser/1.0.0"},
headers={
"x-ms-useragent": "langchain-parser/1.0.0",
},
features=None,
)
assert parser.client == mock_client()
assert parser.api_model == "prebuilt-layout"
assert parser.mode == "markdown"
@pytest.mark.requires("azure", "azure.ai", "azure.ai.documentintelligence")
@patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
@patch("azure.core.credentials.AzureKeyCredential")
def test_doc_intelligence_with_analysis_features(
mock_credential: MagicMock, mock_client: MagicMock
) -> None:
endpoint = "endpoint"
key = "key"
analysis_features = ["ocrHighResolution", "barcodes"]
parser = AzureAIDocumentIntelligenceParser(
api_endpoint=endpoint, api_key=key, analysis_features=analysis_features
)
mock_credential.assert_called_once_with(key)
mock_client.assert_called_once_with(
endpoint=endpoint,
credential=mock_credential(),
headers={
"x-ms-useragent": "langchain-parser/1.0.0",
},
features=analysis_features,
)
assert parser.client == mock_client()
assert parser.api_model == "prebuilt-layout"
assert parser.mode == "markdown"
with pytest.raises(ValueError):
analysis_features = ["invalid"]
parser = AzureAIDocumentIntelligenceParser(
api_endpoint=endpoint, api_key=key, analysis_features=analysis_features
)