feat: Add Client Info to available Google Cloud Clients (#12168)

- This is used internally to gather aggregate usage metrics for the
LangChain integrations

- Note: This cannot be added to some of the Vertex AI integrations at
this time because the SDK doesn't allow overriding the
[`ClientInfo`](https://googleapis.dev/python/google-api-core/latest/client_info.html#module-google.api_core.client_info)

- Added to:
  - BigQuery
  - Google Cloud Storage
  - Document AI
  - Vertex AI Model Garden
  - Document AI Warehouse
  - Vertex AI Search
  - Vertex AI Matching Engine (Cloud Storage Client)
 
@baskaryan, @eyurtsev, @hwchase17

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
Holt Skinner 2023-10-24 10:49:11 -05:00 committed by GitHub
parent 69f5f82804
commit 69d9eae5cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 79 additions and 19 deletions

View File

@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, List, Optional
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader from langchain.document_loaders.base import BaseLoader
from langchain.utilities.vertexai import get_client_info
if TYPE_CHECKING: if TYPE_CHECKING:
from google.auth.credentials import Credentials from google.auth.credentials import Credentials
@ -57,7 +58,11 @@ class BigQueryLoader(BaseLoader):
"Please install it with `pip install google-cloud-bigquery`." "Please install it with `pip install google-cloud-bigquery`."
) from ex ) from ex
bq_client = bigquery.Client(credentials=self.credentials, project=self.project) bq_client = bigquery.Client(
credentials=self.credentials,
project=self.project,
client_info=get_client_info(module="bigquery"),
)
if not bq_client.project: if not bq_client.project:
error_desc = ( error_desc = (
"GCP project for Big Query is not set! Either provide a " "GCP project for Big Query is not set! Either provide a "

View File

@ -3,6 +3,7 @@ from typing import Callable, List, Optional
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.gcs_file import GCSFileLoader from langchain.document_loaders.gcs_file import GCSFileLoader
from langchain.utilities.vertexai import get_client_info
class GCSDirectoryLoader(BaseLoader): class GCSDirectoryLoader(BaseLoader):
@ -18,7 +19,7 @@ class GCSDirectoryLoader(BaseLoader):
"""Initialize with bucket and key name. """Initialize with bucket and key name.
Args: Args:
project_name: The name of the project for the GCS bucket. project_name: The ID of the project for the GCS bucket.
bucket: The name of the GCS bucket. bucket: The name of the GCS bucket.
prefix: The prefix of the GCS bucket. prefix: The prefix of the GCS bucket.
loader_func: A loader function that instantiates a loader based on a loader_func: A loader function that instantiates a loader based on a
@ -39,7 +40,10 @@ class GCSDirectoryLoader(BaseLoader):
"Could not import google-cloud-storage python package. " "Could not import google-cloud-storage python package. "
"Please install it with `pip install google-cloud-storage`." "Please install it with `pip install google-cloud-storage`."
) )
client = storage.Client(project=self.project_name) client = storage.Client(
project=self.project_name,
client_info=get_client_info(module="google-cloud-storage"),
)
docs = [] docs = []
for blob in client.list_blobs(self.bucket, prefix=self.prefix): for blob in client.list_blobs(self.bucket, prefix=self.prefix):
# we shall just skip directories since GCSFileLoader creates # we shall just skip directories since GCSFileLoader creates

View File

@ -5,6 +5,7 @@ from typing import Callable, List, Optional
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.unstructured import UnstructuredFileLoader from langchain.document_loaders.unstructured import UnstructuredFileLoader
from langchain.utilities.vertexai import get_client_info
class GCSFileLoader(BaseLoader): class GCSFileLoader(BaseLoader):
@ -57,7 +58,9 @@ class GCSFileLoader(BaseLoader):
) )
# Initialise a client # Initialise a client
storage_client = storage.Client(self.project_name) storage_client = storage.Client(
self.project_name, client_info=get_client_info("google-cloud-storage")
)
# Create a bucket object for our bucket # Create a bucket object for our bucket
bucket = storage_client.get_bucket(self.bucket) bucket = storage_client.get_bucket(self.bucket)
# Create a blob object from the filepath # Create a blob object from the filepath

View File

@ -12,6 +12,7 @@ from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseBlobParser from langchain.document_loaders.base import BaseBlobParser
from langchain.document_loaders.blob_loaders import Blob from langchain.document_loaders.blob_loaders import Blob
from langchain.utilities.vertexai import get_client_info
from langchain.utils.iter import batch_iterate from langchain.utils.iter import batch_iterate
if TYPE_CHECKING: if TYPE_CHECKING:
@ -89,7 +90,10 @@ class DocAIParser(BaseBlobParser):
options = ClientOptions( options = ClientOptions(
api_endpoint=f"{location}-documentai.googleapis.com" api_endpoint=f"{location}-documentai.googleapis.com"
) )
self._client = DocumentProcessorServiceClient(client_options=options) self._client = DocumentProcessorServiceClient(
client_options=options,
client_info=get_client_info(module="document-ai"),
)
def lazy_parse(self, blob: Blob) -> Iterator[Document]: def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Parses a blob lazily. """Parses a blob lazily.

View File

@ -25,6 +25,7 @@ from langchain.schema import (
) )
from langchain.schema.output import GenerationChunk from langchain.schema.output import GenerationChunk
from langchain.utilities.vertexai import ( from langchain.utilities.vertexai import (
get_client_info,
init_vertexai, init_vertexai,
raise_vertex_import_error, raise_vertex_import_error,
) )
@ -370,9 +371,12 @@ class VertexAIModelGarden(_VertexAIBase, BaseLLM):
client_options = ClientOptions( client_options = ClientOptions(
api_endpoint=f"{values['location']}-aiplatform.googleapis.com" api_endpoint=f"{values['location']}-aiplatform.googleapis.com"
) )
values["client"] = PredictionServiceClient(client_options=client_options) client_info = get_client_info(module="vertex-ai-model-garden")
values["client"] = PredictionServiceClient(
client_options=client_options, client_info=client_info
)
values["async_client"] = PredictionServiceAsyncClient( values["async_client"] = PredictionServiceAsyncClient(
client_options=client_options client_options=client_options, client_info=client_info
) )
return values return values

View File

@ -5,6 +5,7 @@ from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.pydantic_v1 import root_validator from langchain.pydantic_v1 import root_validator
from langchain.schema import BaseRetriever from langchain.schema import BaseRetriever
from langchain.utilities.vertexai import get_client_info
from langchain.utils import get_from_dict_or_env from langchain.utils import get_from_dict_or_env
if TYPE_CHECKING: if TYPE_CHECKING:
@ -29,23 +30,21 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
""" """
location: str = "us" location: str = "us"
"GCP location where DocAI Warehouse is placed." """Google Cloud location where Document AI Warehouse is placed."""
project_number: str project_number: str
"GCP project number, should contain digits only." """Google Cloud project number, should contain digits only."""
schema_id: Optional[str] = None schema_id: Optional[str] = None
"DocAI Warehouse schema to queary against. If nothing is provided, all documents " """Document AI Warehouse schema to query against.
"in the project will be searched." If nothing is provided, all documents in the project will be searched."""
qa_size_limit: int = 5 qa_size_limit: int = 5
"The limit on the number of documents returned." """The limit on the number of documents returned."""
client: "DocumentServiceClient" = None #: :meta private: client: "DocumentServiceClient" = None #: :meta private:
@root_validator() @root_validator()
def validate_environment(cls, values: Dict) -> Dict: def validate_environment(cls, values: Dict) -> Dict:
"""Validates the environment.""" """Validates the environment."""
try: # noqa: F401 try: # noqa: F401
from google.cloud.contentwarehouse_v1 import ( from google.cloud.contentwarehouse_v1 import DocumentServiceClient
DocumentServiceClient,
)
except ImportError as exc: except ImportError as exc:
raise ImportError( raise ImportError(
"google.cloud.contentwarehouse is not installed." "google.cloud.contentwarehouse is not installed."
@ -55,7 +54,9 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
values["project_number"] = get_from_dict_or_env( values["project_number"] = get_from_dict_or_env(
values, "project_number", "PROJECT_NUMBER" values, "project_number", "PROJECT_NUMBER"
) )
values["client"] = DocumentServiceClient() values["client"] = DocumentServiceClient(
client_info=get_client_info(module="document-ai-warehouse")
)
return values return values
def _prepare_request_metadata(self, user_ldap: str) -> "RequestMetadata": def _prepare_request_metadata(self, user_ldap: str) -> "RequestMetadata":

View File

@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence
from langchain.callbacks.manager import CallbackManagerForRetrieverRun from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.pydantic_v1 import BaseModel, Extra, Field, root_validator from langchain.pydantic_v1 import BaseModel, Extra, Field, root_validator
from langchain.schema import BaseRetriever, Document from langchain.schema import BaseRetriever, Document
from langchain.utilities.vertexai import get_client_info
from langchain.utils import get_from_dict_or_env from langchain.utils import get_from_dict_or_env
if TYPE_CHECKING: if TYPE_CHECKING:
@ -260,7 +261,9 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
# For more information, refer to: # For more information, refer to:
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
self._client = SearchServiceClient( self._client = SearchServiceClient(
credentials=self.credentials, client_options=self.client_options credentials=self.credentials,
client_options=self.client_options,
client_info=get_client_info(module="vertex-ai-search"),
) )
self._serving_config = self._client.serving_config_path( self._serving_config = self._client.serving_config_path(
@ -387,7 +390,9 @@ class GoogleVertexAIMultiTurnSearchRetriever(
) )
self._client = ConversationalSearchServiceClient( self._client = ConversationalSearchServiceClient(
credentials=self.credentials, client_options=self.client_options credentials=self.credentials,
client_options=self.client_options,
client_info=get_client_info(module="vertex-ai-search"),
) )
self._serving_config = self._client.serving_config_path( self._serving_config = self._client.serving_config_path(

View File

@ -1,7 +1,9 @@
"""Utilities to init Vertex AI.""" """Utilities to init Vertex AI."""
from importlib import metadata
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING: if TYPE_CHECKING:
from google.api_core.gapic_v1.client_info import ClientInfo
from google.auth.credentials import Credentials from google.auth.credentials import Credentials
@ -46,3 +48,30 @@ def init_vertexai(
location=location, location=location,
credentials=credentials, credentials=credentials,
) )
def get_client_info(module: Optional[str] = None) -> "ClientInfo":
r"""Returns a custom user agent header.
Args:
module (Optional[str]):
Optional. The module for a custom user agent header.
Returns:
google.api_core.gapic_v1.client_info.ClientInfo
"""
try:
from google.api_core.gapic_v1.client_info import ClientInfo
except ImportError as exc:
raise ImportError(
"Could not import ClientInfo. Please, install it with "
"pip install google-api-core"
) from exc
langchain_version = metadata.version("langchain")
client_library_version = (
f"{langchain_version}-{module}" if module else langchain_version
)
return ClientInfo(
client_library_version=client_library_version,
user_agent=f"langchain/{client_library_version}",
)

View File

@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
from langchain.schema.document import Document from langchain.schema.document import Document
from langchain.schema.embeddings import Embeddings from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore from langchain.schema.vectorstore import VectorStore
from langchain.utilities.vertexai import get_client_info
if TYPE_CHECKING: if TYPE_CHECKING:
from google.cloud import storage from google.cloud import storage
@ -419,7 +420,11 @@ class MatchingEngine(VectorStore):
from google.cloud import storage from google.cloud import storage
return storage.Client(credentials=credentials, project=project_id) return storage.Client(
credentials=credentials,
project=project_id,
client_info=get_client_info(module="vertex-ai-matching-engine"),
)
@classmethod @classmethod
def _init_aiplatform( def _init_aiplatform(