mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 22:29:51 +00:00
feat: Add Client Info to available Google Cloud Clients (#12168)
- This is used internally to gather aggregate usage metrics for the LangChain integrations - Note: This cannot be added to some of the Vertex AI integrations at this time because the SDK doesn't allow overriding the [`ClientInfo`](https://googleapis.dev/python/google-api-core/latest/client_info.html#module-google.api_core.client_info) - Added to: - BigQuery - Google Cloud Storage - Document AI - Vertex AI Model Garden - Document AI Warehouse - Vertex AI Search - Vertex AI Matching Engine (Cloud Storage Client) @baskaryan, @eyurtsev, @hwchase17 --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
69f5f82804
commit
69d9eae5cd
@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.auth.credentials import Credentials
|
||||
@ -57,7 +58,11 @@ class BigQueryLoader(BaseLoader):
|
||||
"Please install it with `pip install google-cloud-bigquery`."
|
||||
) from ex
|
||||
|
||||
bq_client = bigquery.Client(credentials=self.credentials, project=self.project)
|
||||
bq_client = bigquery.Client(
|
||||
credentials=self.credentials,
|
||||
project=self.project,
|
||||
client_info=get_client_info(module="bigquery"),
|
||||
)
|
||||
if not bq_client.project:
|
||||
error_desc = (
|
||||
"GCP project for Big Query is not set! Either provide a "
|
||||
|
@ -3,6 +3,7 @@ from typing import Callable, List, Optional
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.document_loaders.gcs_file import GCSFileLoader
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
|
||||
|
||||
class GCSDirectoryLoader(BaseLoader):
|
||||
@ -18,7 +19,7 @@ class GCSDirectoryLoader(BaseLoader):
|
||||
"""Initialize with bucket and key name.
|
||||
|
||||
Args:
|
||||
project_name: The name of the project for the GCS bucket.
|
||||
project_name: The ID of the project for the GCS bucket.
|
||||
bucket: The name of the GCS bucket.
|
||||
prefix: The prefix of the GCS bucket.
|
||||
loader_func: A loader function that instantiates a loader based on a
|
||||
@ -39,7 +40,10 @@ class GCSDirectoryLoader(BaseLoader):
|
||||
"Could not import google-cloud-storage python package. "
|
||||
"Please install it with `pip install google-cloud-storage`."
|
||||
)
|
||||
client = storage.Client(project=self.project_name)
|
||||
client = storage.Client(
|
||||
project=self.project_name,
|
||||
client_info=get_client_info(module="google-cloud-storage"),
|
||||
)
|
||||
docs = []
|
||||
for blob in client.list_blobs(self.bucket, prefix=self.prefix):
|
||||
# we shall just skip directories since GCSFileLoader creates
|
||||
|
@ -5,6 +5,7 @@ from typing import Callable, List, Optional
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
|
||||
|
||||
class GCSFileLoader(BaseLoader):
|
||||
@ -57,7 +58,9 @@ class GCSFileLoader(BaseLoader):
|
||||
)
|
||||
|
||||
# Initialise a client
|
||||
storage_client = storage.Client(self.project_name)
|
||||
storage_client = storage.Client(
|
||||
self.project_name, client_info=get_client_info("google-cloud-storage")
|
||||
)
|
||||
# Create a bucket object for our bucket
|
||||
bucket = storage_client.get_bucket(self.bucket)
|
||||
# Create a blob object from the filepath
|
||||
|
@ -12,6 +12,7 @@ from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseBlobParser
|
||||
from langchain.document_loaders.blob_loaders import Blob
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
from langchain.utils.iter import batch_iterate
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -89,7 +90,10 @@ class DocAIParser(BaseBlobParser):
|
||||
options = ClientOptions(
|
||||
api_endpoint=f"{location}-documentai.googleapis.com"
|
||||
)
|
||||
self._client = DocumentProcessorServiceClient(client_options=options)
|
||||
self._client = DocumentProcessorServiceClient(
|
||||
client_options=options,
|
||||
client_info=get_client_info(module="document-ai"),
|
||||
)
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Parses a blob lazily.
|
||||
|
@ -25,6 +25,7 @@ from langchain.schema import (
|
||||
)
|
||||
from langchain.schema.output import GenerationChunk
|
||||
from langchain.utilities.vertexai import (
|
||||
get_client_info,
|
||||
init_vertexai,
|
||||
raise_vertex_import_error,
|
||||
)
|
||||
@ -370,9 +371,12 @@ class VertexAIModelGarden(_VertexAIBase, BaseLLM):
|
||||
client_options = ClientOptions(
|
||||
api_endpoint=f"{values['location']}-aiplatform.googleapis.com"
|
||||
)
|
||||
values["client"] = PredictionServiceClient(client_options=client_options)
|
||||
client_info = get_client_info(module="vertex-ai-model-garden")
|
||||
values["client"] = PredictionServiceClient(
|
||||
client_options=client_options, client_info=client_info
|
||||
)
|
||||
values["async_client"] = PredictionServiceAsyncClient(
|
||||
client_options=client_options
|
||||
client_options=client_options, client_info=client_info
|
||||
)
|
||||
return values
|
||||
|
||||
|
@ -5,6 +5,7 @@ from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.pydantic_v1 import root_validator
|
||||
from langchain.schema import BaseRetriever
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -29,23 +30,21 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
|
||||
"""
|
||||
|
||||
location: str = "us"
|
||||
"GCP location where DocAI Warehouse is placed."
|
||||
"""Google Cloud location where Document AI Warehouse is placed."""
|
||||
project_number: str
|
||||
"GCP project number, should contain digits only."
|
||||
"""Google Cloud project number, should contain digits only."""
|
||||
schema_id: Optional[str] = None
|
||||
"DocAI Warehouse schema to queary against. If nothing is provided, all documents "
|
||||
"in the project will be searched."
|
||||
"""Document AI Warehouse schema to query against.
|
||||
If nothing is provided, all documents in the project will be searched."""
|
||||
qa_size_limit: int = 5
|
||||
"The limit on the number of documents returned."
|
||||
"""The limit on the number of documents returned."""
|
||||
client: "DocumentServiceClient" = None #: :meta private:
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validates the environment."""
|
||||
try: # noqa: F401
|
||||
from google.cloud.contentwarehouse_v1 import (
|
||||
DocumentServiceClient,
|
||||
)
|
||||
from google.cloud.contentwarehouse_v1 import DocumentServiceClient
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"google.cloud.contentwarehouse is not installed."
|
||||
@ -55,7 +54,9 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
|
||||
values["project_number"] = get_from_dict_or_env(
|
||||
values, "project_number", "PROJECT_NUMBER"
|
||||
)
|
||||
values["client"] = DocumentServiceClient()
|
||||
values["client"] = DocumentServiceClient(
|
||||
client_info=get_client_info(module="document-ai-warehouse")
|
||||
)
|
||||
return values
|
||||
|
||||
def _prepare_request_metadata(self, user_ldap: str) -> "RequestMetadata":
|
||||
|
@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence
|
||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||
from langchain.pydantic_v1 import BaseModel, Extra, Field, root_validator
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -260,7 +261,9 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
||||
# For more information, refer to:
|
||||
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
|
||||
self._client = SearchServiceClient(
|
||||
credentials=self.credentials, client_options=self.client_options
|
||||
credentials=self.credentials,
|
||||
client_options=self.client_options,
|
||||
client_info=get_client_info(module="vertex-ai-search"),
|
||||
)
|
||||
|
||||
self._serving_config = self._client.serving_config_path(
|
||||
@ -387,7 +390,9 @@ class GoogleVertexAIMultiTurnSearchRetriever(
|
||||
)
|
||||
|
||||
self._client = ConversationalSearchServiceClient(
|
||||
credentials=self.credentials, client_options=self.client_options
|
||||
credentials=self.credentials,
|
||||
client_options=self.client_options,
|
||||
client_info=get_client_info(module="vertex-ai-search"),
|
||||
)
|
||||
|
||||
self._serving_config = self._client.serving_config_path(
|
||||
|
@ -1,7 +1,9 @@
|
||||
"""Utilities to init Vertex AI."""
|
||||
from importlib import metadata
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.api_core.gapic_v1.client_info import ClientInfo
|
||||
from google.auth.credentials import Credentials
|
||||
|
||||
|
||||
@ -46,3 +48,30 @@ def init_vertexai(
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
|
||||
def get_client_info(module: Optional[str] = None) -> "ClientInfo":
|
||||
r"""Returns a custom user agent header.
|
||||
|
||||
Args:
|
||||
module (Optional[str]):
|
||||
Optional. The module for a custom user agent header.
|
||||
Returns:
|
||||
google.api_core.gapic_v1.client_info.ClientInfo
|
||||
"""
|
||||
try:
|
||||
from google.api_core.gapic_v1.client_info import ClientInfo
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Could not import ClientInfo. Please, install it with "
|
||||
"pip install google-api-core"
|
||||
) from exc
|
||||
|
||||
langchain_version = metadata.version("langchain")
|
||||
client_library_version = (
|
||||
f"{langchain_version}-{module}" if module else langchain_version
|
||||
)
|
||||
return ClientInfo(
|
||||
client_library_version=client_library_version,
|
||||
user_agent=f"langchain/{client_library_version}",
|
||||
)
|
||||
|
@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
|
||||
from langchain.schema.document import Document
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.vectorstore import VectorStore
|
||||
from langchain.utilities.vertexai import get_client_info
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.cloud import storage
|
||||
@ -419,7 +420,11 @@ class MatchingEngine(VectorStore):
|
||||
|
||||
from google.cloud import storage
|
||||
|
||||
return storage.Client(credentials=credentials, project=project_id)
|
||||
return storage.Client(
|
||||
credentials=credentials,
|
||||
project=project_id,
|
||||
client_info=get_client_info(module="vertex-ai-matching-engine"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _init_aiplatform(
|
||||
|
Loading…
Reference in New Issue
Block a user