mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 06:26:12 +00:00
community[patch]: Make some functions work with Milvus (#10695)
**Description** Make some functions work with Milvus: 1. get_ids: Get primary keys by field in the metadata 2. delete: Delete one or more entities by ids 3. upsert: Update/Insert one or more entities **Issue** None **Dependencies** None **Tag maintainer:** @hwchase17 **Twitter handle:** None --------- Co-authored-by: HoaNQ9 <hoanq.1811@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
@@ -989,3 +989,64 @@ class Milvus(VectorStore):
|
||||
page_content=data.pop(self._text_field),
|
||||
metadata=data.pop(self._metadata_field) if self._metadata_field else data,
|
||||
)
|
||||
|
||||
def get_pks(self, expr: str, **kwargs: Any) -> List[int] | None:
|
||||
"""Get primary keys with expression
|
||||
|
||||
Args:
|
||||
expr: Expression - E.g: "id in [1, 2]", or "title LIKE 'Abc%'"
|
||||
|
||||
Returns:
|
||||
List[int]: List of IDs (Primary Keys)
|
||||
"""
|
||||
|
||||
from pymilvus import MilvusException
|
||||
|
||||
if self.col is None:
|
||||
logger.debug("No existing collection to get pk.")
|
||||
return None
|
||||
|
||||
try:
|
||||
query_result = self.col.query(
|
||||
expr=expr, output_fields=[self._primary_field]
|
||||
)
|
||||
except MilvusException as exc:
|
||||
logger.error("Failed to get ids: %s error: %s", self.collection_name, exc)
|
||||
raise exc
|
||||
pks = [item.get(self._primary_field) for item in query_result]
|
||||
return pks
|
||||
|
||||
def upsert(
|
||||
self,
|
||||
ids: Optional[List[str]] = None,
|
||||
documents: List[Document] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str] | None:
|
||||
"""Update/Insert documents to the vectorstore.
|
||||
|
||||
Args:
|
||||
ids: IDs to update - Let's call get_pks to get ids with expression \n
|
||||
documents (List[Document]): Documents to add to the vectorstore.
|
||||
|
||||
Returns:
|
||||
List[str]: IDs of the added texts.
|
||||
"""
|
||||
|
||||
from pymilvus import MilvusException
|
||||
|
||||
if documents is None or len(documents) == 0:
|
||||
logger.debug("No documents to upsert.")
|
||||
return None
|
||||
|
||||
if ids is not None and len(ids):
|
||||
try:
|
||||
self.delete(ids=ids)
|
||||
except MilvusException:
|
||||
pass
|
||||
try:
|
||||
return self.add_documents(documents=documents)
|
||||
except MilvusException as exc:
|
||||
logger.error(
|
||||
"Failed to upsert entities: %s error: %s", self.collection_name, exc
|
||||
)
|
||||
raise exc
|
||||
|
@@ -1,5 +1,5 @@
|
||||
"""Test Milvus functionality."""
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@@ -25,6 +25,10 @@ def _milvus_from_texts(
|
||||
)
|
||||
|
||||
|
||||
def _get_pks(expr: str, docsearch: Milvus) -> List[Any]:
|
||||
return docsearch.get_pks(expr)
|
||||
|
||||
|
||||
def test_milvus() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearch = _milvus_from_texts()
|
||||
@@ -109,6 +113,42 @@ def test_milvus_no_drop() -> None:
|
||||
assert len(output) == 6
|
||||
|
||||
|
||||
def test_milvus_get_pks() -> None:
|
||||
"""Test end to end construction and get pks with expr"""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"id": i} for i in range(len(texts))]
|
||||
docsearch = _milvus_from_texts(metadatas=metadatas)
|
||||
expr = "id in [1,2]"
|
||||
output = _get_pks(expr, docsearch)
|
||||
assert len(output) == 2
|
||||
|
||||
|
||||
def test_milvus_delete_entities() -> None:
|
||||
"""Test end to end construction and delete entities"""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"id": i} for i in range(len(texts))]
|
||||
docsearch = _milvus_from_texts(metadatas=metadatas)
|
||||
expr = "id in [1,2]"
|
||||
pks = _get_pks(expr, docsearch)
|
||||
result = docsearch.delete(pks)
|
||||
assert result is True
|
||||
|
||||
|
||||
def test_milvus_upsert_entities() -> None:
|
||||
"""Test end to end construction and upsert entities"""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"id": i} for i in range(len(texts))]
|
||||
docsearch = _milvus_from_texts(metadatas=metadatas)
|
||||
expr = "id in [1,2]"
|
||||
pks = _get_pks(expr, docsearch)
|
||||
documents = [
|
||||
Document(page_content="test_1", metadata={"id": 1}),
|
||||
Document(page_content="test_2", metadata={"id": 3}),
|
||||
]
|
||||
ids = docsearch.upsert(pks, documents)
|
||||
assert len(ids) == 2
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# test_milvus()
|
||||
# test_milvus_with_metadata()
|
||||
|
Reference in New Issue
Block a user