mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 19:12:42 +00:00
Minor improvements to rockset vectorstore (#8416)
This PR makes minor improvements to our python notebook, and adds support for `Rockset` workspaces in our vectorstore client. @rlancemartin, @eyurtsev --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -23,7 +23,6 @@ class Rockset(VectorStore):
|
||||
See: https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details
|
||||
|
||||
Everything below assumes `commons` Rockset workspace.
|
||||
TODO: Add support for workspace args.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
@@ -50,6 +49,7 @@ class Rockset(VectorStore):
|
||||
collection_name: str,
|
||||
text_key: str,
|
||||
embedding_key: str,
|
||||
workspace: str = "commons",
|
||||
):
|
||||
"""Initialize with Rockset client.
|
||||
Args:
|
||||
@@ -82,6 +82,7 @@ class Rockset(VectorStore):
|
||||
self._embeddings = embeddings
|
||||
self._text_key = text_key
|
||||
self._embedding_key = embedding_key
|
||||
self._workspace = workspace
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Embeddings:
|
||||
@@ -303,7 +304,7 @@ class Rockset(VectorStore):
|
||||
where_str = f"WHERE {where_str}\n" if where_str else ""
|
||||
return f"""\
|
||||
SELECT * EXCEPT({self._embedding_key}), {distance_str}
|
||||
FROM {self._collection_name}
|
||||
FROM {self._workspace}.{self._collection_name}
|
||||
{where_str}\
|
||||
ORDER BY dist {distance_func.order_by()}
|
||||
LIMIT {str(k)}
|
||||
@@ -311,7 +312,7 @@ LIMIT {str(k)}
|
||||
|
||||
def _write_documents_to_rockset(self, batch: List[dict]) -> List[str]:
|
||||
add_doc_res = self._client.Documents.add_documents(
|
||||
collection=self._collection_name, data=batch
|
||||
collection=self._collection_name, data=batch, workspace=self._workspace
|
||||
)
|
||||
return [doc_status._id for doc_status in add_doc_res.data]
|
||||
|
||||
@@ -328,4 +329,5 @@ LIMIT {str(k)}
|
||||
self._client.Documents.delete_documents(
|
||||
collection=self._collection_name,
|
||||
data=[DeleteDocumentsRequestData(id=i) for i in ids],
|
||||
workspace=self._workspace,
|
||||
)
|
||||
|
@@ -34,12 +34,12 @@ def test_sql_query() -> None:
|
||||
|
||||
client = rockset.RocksetClient(host, api_key)
|
||||
|
||||
col_1 = "Rockset is a real-time analytics database which enables queries on massive, semi-structured data without operational burden. Rockset is serverless and fully managed. It offloads the work of managing configuration, cluster provisioning, denormalization, and shard / index management. Rockset is also SOC 2 Type II compliant and offers encryption at rest and in flight, securing and protecting any sensitive data. Most teams can ingest data into Rockset and start executing queries in less than 15 minutes." # noqa: E501
|
||||
col_1 = "Rockset is a real-time analytics database"
|
||||
col_2 = 2
|
||||
col_3 = "e903e069-b0b5-4b80-95e2-86471b41f55f"
|
||||
id = 7320132
|
||||
|
||||
"""Run a simple SQL query query"""
|
||||
"""Run a simple SQL query"""
|
||||
loader = RocksetLoader(
|
||||
client,
|
||||
rockset.models.QueryRequestSql(
|
||||
|
@@ -33,6 +33,7 @@ logger = logging.getLogger(__name__)
|
||||
#
|
||||
# See https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details.
|
||||
|
||||
workspace = "langchain_tests"
|
||||
collection_name = "langchain_demo"
|
||||
text_key = "description"
|
||||
embedding_key = "description_embedding"
|
||||
@@ -71,10 +72,9 @@ class TestRockset:
|
||||
"Deleting all existing documents from the Rockset collection %s",
|
||||
collection_name,
|
||||
)
|
||||
query = f"select _id from {workspace}.{collection_name}"
|
||||
|
||||
query_response = client.Queries.query(
|
||||
sql={"query": "select _id from {}".format(collection_name)}
|
||||
)
|
||||
query_response = client.Queries.query(sql={"query": query})
|
||||
ids = [
|
||||
str(r["_id"])
|
||||
for r in getattr(
|
||||
@@ -85,12 +85,13 @@ class TestRockset:
|
||||
client.Documents.delete_documents(
|
||||
collection=collection_name,
|
||||
data=[rockset.models.DeleteDocumentsRequestData(id=i) for i in ids],
|
||||
workspace=workspace,
|
||||
)
|
||||
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
embeddings.embed_documents(fake_texts)
|
||||
cls.rockset_vectorstore = Rockset(
|
||||
client, embeddings, collection_name, text_key, embedding_key
|
||||
client, embeddings, collection_name, text_key, embedding_key, workspace
|
||||
)
|
||||
|
||||
def test_rockset_insert_and_search(self) -> None:
|
||||
@@ -127,9 +128,9 @@ class TestRockset:
|
||||
)
|
||||
vector_str = ",".join(map(str, vector))
|
||||
expected = f"""\
|
||||
SELECT * EXCEPT(description_embedding), \
|
||||
COSINE_SIM(description_embedding, [{vector_str}]) as dist
|
||||
FROM langchain_demo
|
||||
SELECT * EXCEPT({embedding_key}), \
|
||||
COSINE_SIM({embedding_key}, [{vector_str}]) as dist
|
||||
FROM {workspace}.{collection_name}
|
||||
ORDER BY dist DESC
|
||||
LIMIT 4
|
||||
"""
|
||||
@@ -145,9 +146,9 @@ LIMIT 4
|
||||
)
|
||||
vector_str = ",".join(map(str, vector))
|
||||
expected = f"""\
|
||||
SELECT * EXCEPT(description_embedding), \
|
||||
COSINE_SIM(description_embedding, [{vector_str}]) as dist
|
||||
FROM langchain_demo
|
||||
SELECT * EXCEPT({embedding_key}), \
|
||||
COSINE_SIM({embedding_key}, [{vector_str}]) as dist
|
||||
FROM {workspace}.{collection_name}
|
||||
WHERE age >= 10
|
||||
ORDER BY dist DESC
|
||||
LIMIT 4
|
||||
|
Reference in New Issue
Block a user