From 68d16d8a07a52072513a4937abc9e97b11c5faf0 Mon Sep 17 00:00:00 2001 From: Paul Czarkowski Date: Thu, 10 Apr 2025 22:22:58 -0400 Subject: [PATCH] Community: Add Managed Identity support for Azure AI Search (#30730) Add Managed Identity support for Azure AI Search --------- Signed-off-by: Paul Czarkowski --- .../retrievers/azure_ai_search.ipynb | 34 ++++++++++++++++++- .../retrievers/azure_ai_search.py | 33 +++++++++++++++--- .../retrievers/test_azure_ai_search.py | 6 ++-- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/docs/docs/integrations/retrievers/azure_ai_search.ipynb b/docs/docs/integrations/retrievers/azure_ai_search.ipynb index e5a9bcec2a1..f2175a8ca75 100644 --- a/docs/docs/integrations/retrievers/azure_ai_search.ipynb +++ b/docs/docs/integrations/retrievers/azure_ai_search.ipynb @@ -40,11 +40,21 @@ "\n", "+ An existing index with vector fields. There are several ways to create one, including using the [vector store module](../vectorstores/azuresearch.ipynb). Or, [try the Azure AI Search REST APIs](https://learn.microsoft.com/azure/search/search-get-started-vector).\n", "\n", - "+ An API key. API keys are generated when you create the search service. If you're just querying an index, you can use the query API key, otherwise use an admin API key. See [Find your API keys](https://learn.microsoft.com/azure/search/search-security-api-keys?tabs=rest-use%2Cportal-find%2Cportal-query#find-existing-keys) for details.\n", + "+ An API key or Azure AD Token.\n", + " + API keys are generated when you create the search service. If you're just querying an index, you can use the query API key, otherwise use an admin API key. See [Find your API keys](https://learn.microsoft.com/azure/search/search-security-api-keys?tabs=rest-use%2Cportal-find%2Cportal-query#find-existing-keys) for details.\n", + " + Azure AD Token can be used with Azure Managed Identity. See [Connect your app to Azure AI Search using identities](https://learn.microsoft.com/en-us/azure/search/keyless-connections?tabs=python%2Cazure-cli) for details.\n", "\n", "We can then set the search service name, index name, and API key as environment variables (alternatively, you can pass them as arguments to `AzureAISearchRetriever`). The search index provides the searchable content." ] }, + { + "cell_type": "markdown", + "id": "2af9655d", + "metadata": {}, + "source": [ + "With an API Key" + ] + }, { "cell_type": "code", "execution_count": null, @@ -59,6 +69,28 @@ "os.environ[\"AZURE_AI_SEARCH_API_KEY\"] = \"\"" ] }, + { + "cell_type": "markdown", + "id": "35ee4780", + "metadata": {}, + "source": [ + "With an Azure AD Token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f33263c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"AZURE_AI_SEARCH_SERVICE_NAME\"] = \"\"\n", + "os.environ[\"AZURE_AI_SEARCH_INDEX_NAME\"] = \"\"\n", + "os.environ[\"AZURE_AI_SEARCH_AD_TOKEN\"] = \"\"" + ] + }, { "cell_type": "markdown", "id": "3e635218-8634-4f39-abc5-39e319eeb136", diff --git a/libs/community/langchain_community/retrievers/azure_ai_search.py b/libs/community/langchain_community/retrievers/azure_ai_search.py index 01549ff3bd0..6db86a3c3bf 100644 --- a/libs/community/langchain_community/retrievers/azure_ai_search.py +++ b/libs/community/langchain_community/retrievers/azure_ai_search.py @@ -32,7 +32,10 @@ class AzureAISearchRetriever(BaseRetriever): pip install -U langchain-community azure-identity azure-search-documents export AZURE_AI_SEARCH_SERVICE_NAME="" export AZURE_AI_SEARCH_INDEX_NAME="" + export AZURE_AI_SEARCH_API_KEY="" + or + export AZURE_AI_SEARCH_BEARER_TOKEN="" Key init args: content_key: str @@ -96,6 +99,14 @@ class AzureAISearchRetriever(BaseRetriever): """API version""" aiosession: Optional[aiohttp.ClientSession] = None """ClientSession, in case we want to reuse connection for better performance.""" + azure_ad_token: str = "" + """Your Azure Active Directory token. + + Automatically inferred from env var `AZURE_AI_SEARCH_AD_TOKEN` if not provided. + + For more: + https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id. + """ content_key: str = "content" """Key in a retrieved result to set as the Document page_content.""" top_k: Optional[int] = None @@ -118,9 +129,19 @@ class AzureAISearchRetriever(BaseRetriever): values["index_name"] = get_from_dict_or_env( values, "index_name", "AZURE_AI_SEARCH_INDEX_NAME" ) - values["api_key"] = get_from_dict_or_env( - values, "api_key", "AZURE_AI_SEARCH_API_KEY" + values["azure_ad_token"] = get_from_dict_or_env( + values, "azure_ad_token", "AZURE_AI_SEARCH_AD_TOKEN", default="" ) + values["api_key"] = get_from_dict_or_env( + values, "api_key", "AZURE_AI_SEARCH_API_KEY", default="" + ) + if values["azure_ad_token"] == "" and values["api_key"] == "": + raise ValueError( + "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, " + "or the `AZURE_AI_SEARCH_API_KEY` or `AZURE_AI_SEARCH_AD_TOKEN` " + "environment variables." + ) + return values def _build_search_url(self, query: str) -> str: @@ -145,10 +166,14 @@ class AzureAISearchRetriever(BaseRetriever): @property def _headers(self) -> Dict[str, str]: - return { + headers = { "Content-Type": "application/json", - "api-key": self.api_key, } + if not self.azure_ad_token: + headers["Authorization"] = f"Bearer {self.azure_ad_token}" + else: + headers["api-key"] = f"{self.api_key}" + return headers def _search(self, query: str) -> List[dict]: search_url = self._build_search_url(query) diff --git a/libs/community/tests/integration_tests/retrievers/test_azure_ai_search.py b/libs/community/tests/integration_tests/retrievers/test_azure_ai_search.py index 27d6c3edd8c..4a831ba5a08 100644 --- a/libs/community/tests/integration_tests/retrievers/test_azure_ai_search.py +++ b/libs/community/tests/integration_tests/retrievers/test_azure_ai_search.py @@ -12,7 +12,8 @@ def test_azure_ai_search_invoke() -> None: """Test valid call to Azure AI Search. In order to run this test, you should provide - a `service_name`, azure search `api_key` and an `index_name` + a `service_name`, an 'index_name' and + an azure search `api_key` or 'azure_ad_token' as arguments for the AzureAISearchRetriever in both tests. api_version, aiosession and topk_k are optional parameters. """ @@ -32,7 +33,8 @@ async def test_azure_ai_search_ainvoke() -> None: """Test valid async call to Azure AI Search. In order to run this test, you should provide - a `service_name`, azure search `api_key` and an `index_name` + a `service_name`, an 'index_name' and + an azure search `api_key` or 'azure_ad_token' as arguments for the AzureAISearchRetriever. """ retriever = AzureAISearchRetriever()