From ad101adec8cfbdc7b8c9edebad099cce76966c4a Mon Sep 17 00:00:00 2001 From: am-kinetica <85610855+am-kinetica@users.noreply.github.com> Date: Tue, 11 Jun 2024 19:31:26 +0530 Subject: [PATCH] community[patch]: Kinetica Integrations handled error in querying; quotes in table names; updated gpudb API (#22724) - [ ] **Miscellaneous updates and fixes**: - **Description:** Handled error in querying; quotes in table names; updated gpudb API - **Issue:** Threw an error with an error message difficult to understand if a query failed or returned no records - **Dependencies:** Updated GPUDB API version to `7.2.0.9` @baskaryan @hwchase17 --- .../document_loaders/kinetica.ipynb | 6 +-- .../integrations/retrievers/kinetica.ipynb | 2 +- .../integrations/vectorstores/kinetica.ipynb | 2 +- .../document_loaders/kinetica_loader.py | 2 +- .../vectorstores/kinetica.py | 44 ++++++++++++++----- .../vectorstores/test_kinetica.py | 3 +- 6 files changed, 42 insertions(+), 17 deletions(-) diff --git a/docs/docs/integrations/document_loaders/kinetica.ipynb b/docs/docs/integrations/document_loaders/kinetica.ipynb index 0176557308f..bf1e6c7d7f8 100644 --- a/docs/docs/integrations/document_loaders/kinetica.ipynb +++ b/docs/docs/integrations/document_loaders/kinetica.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install gpudb==7.2.0.1" + "%pip install gpudb==7.2.0.9" ] }, { @@ -97,14 +97,14 @@ "# data and the `SCHEMA.TABLE` combination must exist in Kinetica.\n", "\n", "QUERY = \"select text, survey_id as source from SCHEMA.TABLE limit 10\"\n", - "snowflake_loader = KineticaLoader(\n", + "kl = KineticaLoader(\n", " query=QUERY,\n", " host=HOST,\n", " username=USERNAME,\n", " password=PASSWORD,\n", " metadata_columns=[\"source\"],\n", ")\n", - "kinetica_documents = snowflake_loader.load()\n", + "kinetica_documents = kl.load()\n", "print(kinetica_documents)" ] } diff --git a/docs/docs/integrations/retrievers/kinetica.ipynb b/docs/docs/integrations/retrievers/kinetica.ipynb index 79aaa63ad90..afde339dd9c 100644 --- a/docs/docs/integrations/retrievers/kinetica.ipynb +++ b/docs/docs/integrations/retrievers/kinetica.ipynb @@ -22,7 +22,7 @@ "outputs": [], "source": [ "# Please ensure that this connector is installed in your working environment.\n", - "%pip install gpudb==7.2.0.1" + "%pip install gpudb==7.2.0.9" ] }, { diff --git a/docs/docs/integrations/vectorstores/kinetica.ipynb b/docs/docs/integrations/vectorstores/kinetica.ipynb index 5df1dfd96e8..491de90aaac 100644 --- a/docs/docs/integrations/vectorstores/kinetica.ipynb +++ b/docs/docs/integrations/vectorstores/kinetica.ipynb @@ -61,7 +61,7 @@ "source": [ "# Pip install necessary package\n", "%pip install --upgrade --quiet langchain-openai langchain-community\n", - "%pip install gpudb==7.2.0.1\n", + "%pip install gpudb==7.2.0.9\n", "%pip install --upgrade --quiet tiktoken" ] }, diff --git a/libs/community/langchain_community/document_loaders/kinetica_loader.py b/libs/community/langchain_community/document_loaders/kinetica_loader.py index a6a15db4de7..965cd3feea9 100644 --- a/libs/community/langchain_community/document_loaders/kinetica_loader.py +++ b/libs/community/langchain_community/document_loaders/kinetica_loader.py @@ -49,7 +49,7 @@ class KineticaLoader(BaseLoader): except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.1`." + "Please install it with `pip install gpudb==7.2.0.9`." ) try: diff --git a/libs/community/langchain_community/vectorstores/kinetica.py b/libs/community/langchain_community/vectorstores/kinetica.py index c2525688344..bb67e77b3f9 100644 --- a/libs/community/langchain_community/vectorstores/kinetica.py +++ b/libs/community/langchain_community/vectorstores/kinetica.py @@ -168,7 +168,7 @@ class Kinetica(VectorStore): except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.1`." + "Please install it with `pip install gpudb==7.2.0.9`." ) self.dimensions = dimensions @@ -197,7 +197,7 @@ class Kinetica(VectorStore): except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.1`." + "Please install it with `pip install gpudb==7.2.0.9`." ) options = GPUdb.Options() @@ -224,6 +224,8 @@ class Kinetica(VectorStore): distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, pre_delete_collection: bool = False, logger: Optional[logging.Logger] = None, + *, + schema_name: str = _LANGCHAIN_DEFAULT_SCHEMA_NAME, **kwargs: Any, ) -> Kinetica: """Class method to assist in constructing the `Kinetica` store instance @@ -239,8 +241,10 @@ class Kinetica(VectorStore): with each text. Defaults to None. ids (Optional[List[str]], optional): List of unique IDs (UUID by default) associated with each text. Defaults to None. - collection_name (str, optional): Kinetica schema name. + collection_name (str, optional): Kinetica table name. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. + schema_name (str, optional): Kinetica schema name. + Defaults to _LANGCHAIN_DEFAULT_SCHEMA_NAME. distance_strategy (DistanceStrategy, optional): Not used for now. Defaults to DEFAULT_DISTANCE_STRATEGY. pre_delete_collection (bool, optional): Whether to delete the Kinetica @@ -260,8 +264,8 @@ class Kinetica(VectorStore): store = cls( config=config, collection_name=collection_name, + schema_name=schema_name, embedding_function=embedding, - # dimensions=dimensions, distance_strategy=distance_strategy, pre_delete_collection=pre_delete_collection, logger=logger, @@ -284,7 +288,7 @@ class Kinetica(VectorStore): except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.1`." + "Please install it with `pip install gpudb==7.2.0.9`." ) return GPUdbTable( _type=self.table_schema, @@ -422,12 +426,17 @@ class Kinetica(VectorStore): k: int = 4, filter: Optional[dict] = None, ) -> List[Tuple[Document, float]]: + from gpudb import GPUdbException + resp: Dict = self.__query_collection(embedding, k, filter) + if resp and resp["status_info"]["status"] == "OK" and "records" in resp: + records: OrderedDict = resp["records"] + results = list(zip(*list(records.values()))) - records: OrderedDict = resp["records"] - results = list(zip(*list(records.values()))) - - return self._results_to_docs_and_scores(results) + return self._results_to_docs_and_scores(results) + else: + self.logger.error(resp["status_info"]["message"]) + raise GPUdbException(resp["status_info"]["message"]) def similarity_search_by_vector( self, @@ -545,7 +554,7 @@ class Kinetica(VectorStore): query_string = f""" SELECT text, metadata, {dist_strategy}(embedding, '{embedding_str}') as distance, embedding - FROM {self.table_name} + FROM "{self.schema_name}"."{self.collection_name}" {where_clause} ORDER BY distance asc NULLS LAST LIMIT {k} @@ -760,6 +769,8 @@ class Kinetica(VectorStore): distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, ids: Optional[List[str]] = None, pre_delete_collection: bool = False, + *, + schema_name: str = _LANGCHAIN_DEFAULT_SCHEMA_NAME, **kwargs: Any, ) -> Kinetica: """Adds the texts passed in to the vector store and returns it @@ -773,6 +784,8 @@ class Kinetica(VectorStore): config (KineticaSettings): a `KineticaSettings` instance collection_name (str, optional): Kinetica schema name. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. + schema_name (str, optional): Kinetica schema name. + Defaults to _LANGCHAIN_DEFAULT_SCHEMA_NAME. distance_strategy (DistanceStrategy, optional): Distance strategy e.g., l2, cosine etc.. Defaults to DEFAULT_DISTANCE_STRATEGY. ids (Optional[List[str]], optional): A list of UUIDs for each @@ -804,6 +817,7 @@ class Kinetica(VectorStore): metadatas=metadatas, ids=ids, collection_name=collection_name, + schema_name=schema_name, distance_strategy=distance_strategy, pre_delete_collection=pre_delete_collection, **kwargs, @@ -823,6 +837,8 @@ class Kinetica(VectorStore): distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, ids: Optional[List[str]] = None, pre_delete_collection: bool = False, + *, + schema_name: str = _LANGCHAIN_DEFAULT_SCHEMA_NAME, **kwargs: Any, ) -> Kinetica: """Adds the embeddings passed in to the vector store and returns it @@ -839,6 +855,8 @@ class Kinetica(VectorStore): default will be used. Defaults to Dimension.OPENAI. collection_name (str, optional): Kinetica schema name. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. + schema_name (str, optional): Kinetica schema name. + Defaults to _LANGCHAIN_DEFAULT_SCHEMA_NAME. distance_strategy (DistanceStrategy, optional): Distance strategy e.g., l2, cosine etc.. Defaults to DEFAULT_DISTANCE_STRATEGY. ids (Optional[List[str]], optional): A list of UUIDs for each text/document. @@ -863,6 +881,7 @@ class Kinetica(VectorStore): metadatas=metadatas, ids=ids, collection_name=collection_name, + schema_name=schema_name, distance_strategy=distance_strategy, pre_delete_collection=pre_delete_collection, **kwargs, @@ -879,6 +898,8 @@ class Kinetica(VectorStore): distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, ids: Optional[List[str]] = None, pre_delete_collection: bool = False, + *, + schema_name: str = _LANGCHAIN_DEFAULT_SCHEMA_NAME, **kwargs: Any, ) -> Kinetica: """Adds the list of `Document` passed in to the vector store and returns it @@ -892,6 +913,8 @@ class Kinetica(VectorStore): the texts/documents. Defaults to None. collection_name (str, optional): Kinetica schema name. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. + schema_name (str, optional): Kinetica schema name. + Defaults to _LANGCHAIN_DEFAULT_SCHEMA_NAME. distance_strategy (DistanceStrategy, optional): Distance strategy e.g., l2, cosine etc.. Defaults to DEFAULT_DISTANCE_STRATEGY. ids (Optional[List[str]], optional): A list of UUIDs for each text/document. @@ -912,6 +935,7 @@ class Kinetica(VectorStore): metadatas=metadatas, config=config, collection_name=collection_name, + schema_name=schema_name, distance_strategy=distance_strategy, ids=ids, pre_delete_collection=pre_delete_collection, diff --git a/libs/community/tests/integration_tests/vectorstores/test_kinetica.py b/libs/community/tests/integration_tests/vectorstores/test_kinetica.py index bde61c47f8f..8b535a4f9f7 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_kinetica.py +++ b/libs/community/tests/integration_tests/vectorstores/test_kinetica.py @@ -46,7 +46,8 @@ def test_kinetica(create_config: KineticaSettings) -> None: texts=texts, metadatas=metadatas, embedding=FakeEmbeddingsWithAdaDimension(), - collection_name="test_kinetica", + collection_name="1test_kinetica", + schema_name="1test", pre_delete_collection=True, ) output = docsearch.similarity_search("foo", k=1)