community[minor]: Added VLite as VectorStore (#20245)

Support [VLite](https://github.com/sdan/vlite) as a new VectorStore type. **Description**: vlite is a simple and blazing fast vector database(vdb) made with numpy. It abstracts a lot of the functionality around using a vdb in the retrieval augmented generation(RAG) pipeline such as embeddings generation, chunking, and file processing while still giving developers the functionality to change how they're made/stored. **Before submitting**: Added tests [here](c09c2ebd5c/libs/community/tests/integration_tests/vectorstores/test_vlite.py) Added ipython notebook [here](c09c2ebd5c/docs/docs/integrations/vectorstores/vlite.ipynb) Added simple docs on how to use [here](c09c2ebd5c/docs/docs/integrations/providers/vlite.mdx) **Profiles** Maintainers: @sdan Twitter handles: [@sdand](https://x.com/sdand) --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-08 22:42:05 +00:00 · 2024-04-16 18:24:38 -07:00
parent 7824291252
commit a7c5e41443
8 changed files with 560 additions and 0 deletions
--- a/libs/community/tests/integration_tests/vectorstores/test_vlite.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_vlite.py
@@ -0,0 +1,88 @@
+"""Test VLite functionality."""
+
+from langchain_core.documents import Document
+
+from langchain_community.embeddings import FakeEmbeddings
+from langchain_community.vectorstores import VLite
+
+
+def test_vlite() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = VLite.from_texts(texts=texts, embedding=FakeEmbeddings())
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
+def test_vlite_with_metadatas() -> None:
+    """Test end to end construction and search with metadata."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = VLite.from_texts(
+        texts=texts, embedding=FakeEmbeddings(), metadatas=metadatas
+    )
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo", metadata={"page": "0"})]
+
+
+def test_vlite_with_metadatas_with_scores() -> None:
+    """Test end to end construction and search with metadata and scores."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = VLite.from_texts(
+        texts=texts, embedding=FakeEmbeddings(), metadatas=metadatas
+    )
+    output = docsearch.similarity_search_with_score("foo", k=1)
+    assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
+
+
+def test_vlite_update_document() -> None:
+    """Test updating a document."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = VLite.from_texts(
+        texts=texts, embedding=FakeEmbeddings(), ids=["1", "2", "3"]
+    )
+    docsearch.update_document("1", Document(page_content="updated_foo"))
+    output = docsearch.similarity_search("updated_foo", k=1)
+    assert output == [Document(page_content="updated_foo")]
+
+
+def test_vlite_delete_document() -> None:
+    """Test deleting a document."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = VLite.from_texts(
+        texts=texts, embedding=FakeEmbeddings(), ids=["1", "2", "3"]
+    )
+    docsearch.delete(["1"])
+    output = docsearch.similarity_search("foo", k=3)
+    assert Document(page_content="foo") not in output
+
+
+def test_vlite_get_documents() -> None:
+    """Test getting documents by IDs."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = VLite.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metadatas=metadatas,
+        ids=["1", "2", "3"],
+    )
+    output = docsearch.get(ids=["1", "3"])
+    assert output == [
+        Document(page_content="foo", metadata={"page": "0"}),
+        Document(page_content="baz", metadata={"page": "2"}),
+    ]
+
+
+def test_vlite_from_existing_index() -> None:
+    """Test loading from an existing index."""
+    texts = ["foo", "bar", "baz"]
+    VLite.from_texts(
+        texts=texts, embedding=FakeEmbeddings(), collection="test_collection"
+    )
+    new_docsearch = VLite.from_existing_index(
+        collection="test_collection", embedding=FakeEmbeddings()
+    )
+    output = new_docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
--- a/libs/community/tests/unit_tests/vectorstores/test_imports.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_imports.py
@@ -89,6 +89,7 @@ EXPECTED_ALL = [
    "Vectara",
    "VectorStore",
    "VespaStore",
+    "VLite",
    "Weaviate",
    "Yellowbrick",
    "ZepVectorStore",
--- a/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py
@@ -88,6 +88,7 @@ def test_compatible_vectorstore_documentation() -> None:
        "VDMS",
        "Vearch",
        "VespaStore",
+        "VLite",
        "Weaviate",
        "ZepVectorStore",
        "Zilliz",
--- a/libs/community/tests/unit_tests/vectorstores/test_public_api.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_public_api.py
@@ -82,6 +82,7 @@ _EXPECTED = [
    "Vearch",
    "Vectara",
    "VespaStore",
+    "VLite",
    "Weaviate",
    "ZepVectorStore",
    "Zilliz",