From b20230c8009ddc09819329fbf47b0d41221d0d2d Mon Sep 17 00:00:00 2001 From: Brian Sharon Date: Wed, 11 Dec 2024 16:49:35 -0700 Subject: [PATCH] community: use correct `id_key` when deleting by id in LanceDB wrapper (#28655) - **Description:** The current version of the `delete` method assumes that the id field will always be called `id`. - **Issue:** n/a - **Dependencies:** n/a - **Twitter handle:** ugh, Twitter :D --- Thank you for contributing to LangChain! - [x] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, etc. is being modified. Use "docs: ..." for purely docs changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [x] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: Erick Friis --- .../langchain_community/vectorstores/lancedb.py | 2 +- .../integration_tests/vectorstores/test_lancedb.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/vectorstores/lancedb.py b/libs/community/langchain_community/vectorstores/lancedb.py index bcde354e888..7253d885b11 100644 --- a/libs/community/langchain_community/vectorstores/lancedb.py +++ b/libs/community/langchain_community/vectorstores/lancedb.py @@ -676,7 +676,7 @@ class LanceDB(VectorStore): if filter: tbl.delete(filter) elif ids: - tbl.delete("id in ('{}')".format(",".join(ids))) + tbl.delete(f"{self._id_key} in ('{{}}')".format(",".join(ids))) elif drop_columns: if self.api_key is not None: raise NotImplementedError( diff --git a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py index 7152c93bfa1..7ba3a004663 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py @@ -84,6 +84,16 @@ def test_lancedb_delete() -> None: assert store.get_table().count_rows() == 2 +@pytest.mark.requires("lancedb") +def test_lancedb_delete_by_ids() -> None: + embeddings = FakeEmbeddings() + + store = LanceDB(embedding=embeddings, id_key="pk") + ids = store.add_texts(["text 1", "text 2", "item 3"]) + store.delete(ids=ids) + assert store.get_table().count_rows() == 0 + + @pytest.mark.requires("lancedb") def test_lancedb_all_searches() -> None: embeddings = FakeEmbeddings()