From 16bd0697dce35f7a1672231959307c1efb05876b Mon Sep 17 00:00:00 2001 From: Stuart Marsh Date: Mon, 5 Aug 2024 17:01:55 +0100 Subject: [PATCH] milvus: fixed bug when using partition key and dynamic fields together (#25028) **Description:** This PR fixes a bug where if `enable_dynamic_field` and `partition_key_field` are enabled at the same time, a pymilvus error occurs. Milvus requires the partition key field to be a full schema defined field, and not a dynamic one, so it will throw the error "the specified partition key field {field} not exist" when creating the collection. When `enabled_dynamic_field` is set to `True`, all schema field creation based on `metadatas` is skipped. This code now checks if `partition_key_field` is set, and creates the field. Integration test added. **Twitter handle:** StuartMarshUK --------- Co-authored-by: Stuart Marsh Co-authored-by: Erick Friis --- .../langchain_milvus/vectorstores/milvus.py | 9 +++++- .../vectorstores/test_milvus.py | 29 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py index c33af2bfcd7..d048e8528b1 100644 --- a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py +++ b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py @@ -433,7 +433,14 @@ class Milvus(VectorStore): # ... # ``` if self.enable_dynamic_field: - pass + # If both dynamic fields and partition key field are enabled + if self._partition_key_field is not None: + # create the partition field + fields.append( + FieldSchema( + self._partition_key_field, DataType.VARCHAR, max_length=65_535 + ) + ) elif self._metadata_field is not None: fields.append(FieldSchema(self._metadata_field, DataType.JSON)) else: diff --git a/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py b/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py index a565cac81d4..7e9b8f76f9a 100644 --- a/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py +++ b/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py @@ -1,4 +1,5 @@ """Test Milvus functionality.""" + from typing import Any, List, Optional import pytest @@ -274,6 +275,34 @@ def test_milvus_metadata_field() -> None: } +def test_milvus_enable_dynamic_field_with_partition_key() -> None: + """ + Test end to end construction and enable dynamic field + with partition_key_field + """ + texts = ["foo", "bar", "baz"] + metadatas = [{"id": i, "namespace": f"name_{i}"} for i in range(len(texts))] + + docsearch = _milvus_from_texts( + metadatas=metadatas, enable_dynamic_field=True, partition_key_field="namespace" + ) + + # filter on a single namespace + output = docsearch.similarity_search("foo", k=10, expr="namespace == 'name_2'") + assert len(output) == 1 + + # without namespace filter + output = docsearch.similarity_search("foo", k=10) + assert len(output) == 3 + + assert set(docsearch.fields) == { + docsearch._primary_field, + docsearch._text_field, + docsearch._vector_field, + docsearch._partition_key_field, + } + + # if __name__ == "__main__": # test_milvus() # test_milvus_vector_search()