mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 14:49:29 +00:00
community[patch]: Add Blended Search Support to GoogleVertexAISearchRetriever
(#19082)
https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
parent
0ddfe7fc9d
commit
cee03630d9
@ -30,7 +30,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -115,10 +115,12 @@
|
||||
" - `global` (default)\n",
|
||||
" - `us`\n",
|
||||
" - `eu`\n",
|
||||
"- `data_store_id` - The ID of the data store you want to use.\n",
|
||||
" - Note: This was called `search_engine_id` in previous versions of the retriever.\n",
|
||||
"\n",
|
||||
"The `project_id` and `data_store_id` parameters can be provided explicitly in the retriever's constructor or through the environment variables - `PROJECT_ID` and `DATA_STORE_ID`.\n",
|
||||
"One of:\n",
|
||||
"- `search_engine_id` - The ID of the search app you want to use. (Required for Blended Search)\n",
|
||||
"- `data_store_id` - The ID of the data store you want to use.\n",
|
||||
"\n",
|
||||
"The `project_id`, `search_engine_id` and `data_store_id` parameters can be provided explicitly in the retriever's constructor or through the environment variables - `PROJECT_ID`, `SEARCH_ENGINE_ID` and `DATA_STORE_ID`.\n",
|
||||
"\n",
|
||||
"You can also configure a number of optional parameters, including:\n",
|
||||
"\n",
|
||||
@ -137,17 +139,17 @@
|
||||
"- `engine_data_type` - Defines the Vertex AI Search data type\n",
|
||||
" - `0` - Unstructured data\n",
|
||||
" - `1` - Structured data\n",
|
||||
" - `2` - Website data with [Advanced Website Indexing](https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing)\n",
|
||||
" - `2` - Website data\n",
|
||||
" - `3` - [Blended search](https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores)\n",
|
||||
"\n",
|
||||
"### Migration guide for `GoogleCloudEnterpriseSearchRetriever`\n",
|
||||
"\n",
|
||||
"In previous versions, this retriever was called `GoogleCloudEnterpriseSearchRetriever`. Some backwards-incompatible changes had to be made to the retriever after the General Availability launch due to changes in the product behavior.\n",
|
||||
"In previous versions, this retriever was called `GoogleCloudEnterpriseSearchRetriever`.\n",
|
||||
"\n",
|
||||
"To update to the new retriever, make the following changes:\n",
|
||||
"\n",
|
||||
"- Change the import from: `from langchain.retrievers import GoogleCloudEnterpriseSearchRetriever` -> `from langchain.retrievers import GoogleVertexAISearchRetriever`.\n",
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n",
|
||||
"- Upon class initialization, change the `search_engine_id` parameter name to `data_store_id`.\n"
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -170,6 +172,7 @@
|
||||
"\n",
|
||||
"PROJECT_ID = \"<YOUR PROJECT ID>\" # Set to your Project ID\n",
|
||||
"LOCATION_ID = \"<YOUR LOCATION>\" # Set to your data store location\n",
|
||||
"SEARCH_ENGINE_ID = \"<YOUR SEARCH APP ID>\" # Set to your search app ID\n",
|
||||
"DATA_STORE_ID = \"<YOUR DATA STORE ID>\" # Set to your data store ID"
|
||||
]
|
||||
},
|
||||
@ -281,6 +284,32 @@
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever for **blended** data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" search_engine_id=SEARCH_ENGINE_ID,\n",
|
||||
" max_documents=3,\n",
|
||||
" engine_data_type=3,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = retriever.get_relevant_documents(query)\n",
|
||||
"for doc in result:\n",
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -322,7 +351,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -25,8 +25,10 @@ if TYPE_CHECKING:
|
||||
class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
||||
project_id: str
|
||||
"""Google Cloud Project ID."""
|
||||
data_store_id: str
|
||||
data_store_id: Optional[str] = None
|
||||
"""Vertex AI Search data store ID."""
|
||||
search_engine_id: Optional[str] = None
|
||||
"""Vertex AI Search app ID."""
|
||||
location_id: str = "global"
|
||||
"""Vertex AI Search data store location."""
|
||||
serving_config_id: str = "default_config"
|
||||
@ -35,11 +37,12 @@ class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
||||
"""The default custom credentials (google.auth.credentials.Credentials) to use
|
||||
when making API calls. If not provided, credentials will be ascertained from
|
||||
the environment."""
|
||||
engine_data_type: int = Field(default=0, ge=0, le=2)
|
||||
""" Defines the Vertex AI Search data type
|
||||
engine_data_type: int = Field(default=0, ge=0, le=3)
|
||||
""" Defines the Vertex AI Search app data type
|
||||
0 - Unstructured data
|
||||
1 - Structured data
|
||||
2 - Website data
|
||||
3 - Blended search
|
||||
"""
|
||||
|
||||
@root_validator(pre=True)
|
||||
@ -51,7 +54,7 @@ class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
||||
raise ImportError(
|
||||
"google.cloud.discoveryengine is not installed."
|
||||
"Please install it with pip install "
|
||||
"google-cloud-discoveryengine>=0.11.0"
|
||||
"google-cloud-discoveryengine>=0.11.10"
|
||||
) from exc
|
||||
try:
|
||||
from google.api_core.exceptions import InvalidArgument # noqa: F401
|
||||
@ -64,26 +67,15 @@ class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
||||
values["project_id"] = get_from_dict_or_env(values, "project_id", "PROJECT_ID")
|
||||
|
||||
try:
|
||||
# For backwards compatibility
|
||||
search_engine_id = get_from_dict_or_env(
|
||||
values["data_store_id"] = get_from_dict_or_env(
|
||||
values, "data_store_id", "DATA_STORE_ID"
|
||||
)
|
||||
values["search_engine_id"] = get_from_dict_or_env(
|
||||
values, "search_engine_id", "SEARCH_ENGINE_ID"
|
||||
)
|
||||
|
||||
if search_engine_id:
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"The `search_engine_id` parameter is deprecated. Use `data_store_id` instead.", # noqa: E501
|
||||
DeprecationWarning,
|
||||
)
|
||||
values["data_store_id"] = search_engine_id
|
||||
except: # noqa: E722
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
values["data_store_id"] = get_from_dict_or_env(
|
||||
values, "data_store_id", "DATA_STORE_ID"
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
@property
|
||||
@ -273,12 +265,24 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
||||
client_info=get_client_info(module="vertex-ai-search"),
|
||||
)
|
||||
|
||||
self._serving_config = self._client.serving_config_path(
|
||||
project=self.project_id,
|
||||
location=self.location_id,
|
||||
data_store=self.data_store_id,
|
||||
serving_config=self.serving_config_id,
|
||||
)
|
||||
if self.engine_data_type == 3 and not self.search_engine_id:
|
||||
raise ValueError(
|
||||
"search_engine_id must be specified for blended search apps."
|
||||
)
|
||||
|
||||
if self.search_engine_id:
|
||||
self._serving_config = f"projects/{self.project_id}/locations/{self.location_id}/collections/default_collection/engines/{self.search_engine_id}/servingConfigs/default_config" # noqa: E501
|
||||
elif self.data_store_id:
|
||||
self._serving_config = self._client.serving_config_path(
|
||||
project=self.project_id,
|
||||
location=self.location_id,
|
||||
data_store=self.data_store_id,
|
||||
serving_config=self.serving_config_id,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Either data_store_id or search_engine_id must be specified."
|
||||
)
|
||||
|
||||
def _create_search_request(self, query: str) -> SearchRequest:
|
||||
"""Prepares a SearchRequest object."""
|
||||
@ -310,7 +314,7 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
||||
)
|
||||
elif self.engine_data_type == 1:
|
||||
content_search_spec = None
|
||||
elif self.engine_data_type == 2:
|
||||
elif self.engine_data_type in (2, 3):
|
||||
content_search_spec = SearchRequest.ContentSearchSpec(
|
||||
extractive_content_spec=SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
||||
max_extractive_answer_count=self.max_extractive_answer_count,
|
||||
@ -322,7 +326,7 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Only data store type 0 (Unstructured), 1 (Structured),"
|
||||
"or 2 (Website) are supported currently."
|
||||
"2 (Website), or 3 (Blended) are supported currently."
|
||||
+ f" Got {self.engine_data_type}"
|
||||
)
|
||||
|
||||
@ -363,7 +367,7 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
||||
)
|
||||
elif self.engine_data_type == 1:
|
||||
documents = self._convert_structured_search_response(response.results)
|
||||
elif self.engine_data_type == 2:
|
||||
elif self.engine_data_type in (2, 3):
|
||||
chunk_type = (
|
||||
"extractive_answers" if self.get_extractive_answers else "snippets"
|
||||
)
|
||||
@ -373,7 +377,7 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Only data store type 0 (Unstructured), 1 (Structured),"
|
||||
"or 2 (Website) are supported currently."
|
||||
"2 (Website), or 3 (Blended) are supported currently."
|
||||
+ f" Got {self.engine_data_type}"
|
||||
)
|
||||
|
||||
@ -410,6 +414,9 @@ class GoogleVertexAIMultiTurnSearchRetriever(
|
||||
client_info=get_client_info(module="vertex-ai-search"),
|
||||
)
|
||||
|
||||
if not self.data_store_id:
|
||||
raise ValueError("data_store_id is required for MultiTurnSearchRetriever.")
|
||||
|
||||
self._serving_config = self._client.serving_config_path(
|
||||
project=self.project_id,
|
||||
location=self.location_id,
|
||||
@ -417,9 +424,9 @@ class GoogleVertexAIMultiTurnSearchRetriever(
|
||||
serving_config=self.serving_config_id,
|
||||
)
|
||||
|
||||
if self.engine_data_type == 1:
|
||||
if self.engine_data_type == 1 or self.engine_data_type == 3:
|
||||
raise NotImplementedError(
|
||||
"Data store type 1 (Structured)"
|
||||
"Data store type 1 (Structured) and 3 (Blended)"
|
||||
"is not currently supported for multi-turn search."
|
||||
+ f" Got {self.engine_data_type}"
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user