community[minor]: Added VLite as VectorStore (#20245)

Support [VLite](https://github.com/sdan/vlite) as a new VectorStore
type.

**Description**:
vlite is a simple and blazing fast vector database(vdb) made with numpy.
It abstracts a lot of the functionality around using a vdb in the
retrieval augmented generation(RAG) pipeline such as embeddings
generation, chunking, and file processing while still giving developers
the functionality to change how they're made/stored.

**Before submitting**:
Added tests
[here](c09c2ebd5c/libs/community/tests/integration_tests/vectorstores/test_vlite.py)
Added ipython notebook
[here](c09c2ebd5c/docs/docs/integrations/vectorstores/vlite.ipynb)
Added simple docs on how to use
[here](c09c2ebd5c/docs/docs/integrations/providers/vlite.mdx)

**Profiles**

Maintainers: @sdan
Twitter handles: [@sdand](https://x.com/sdand)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
sdan
2024-04-16 18:24:38 -07:00
committed by GitHub
parent 7824291252
commit a7c5e41443
8 changed files with 560 additions and 0 deletions

View File

@@ -0,0 +1,88 @@
"""Test VLite functionality."""
from langchain_core.documents import Document
from langchain_community.embeddings import FakeEmbeddings
from langchain_community.vectorstores import VLite
def test_vlite() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = VLite.from_texts(texts=texts, embedding=FakeEmbeddings())
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_vlite_with_metadatas() -> None:
"""Test end to end construction and search with metadata."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = VLite.from_texts(
texts=texts, embedding=FakeEmbeddings(), metadatas=metadatas
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": "0"})]
def test_vlite_with_metadatas_with_scores() -> None:
"""Test end to end construction and search with metadata and scores."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = VLite.from_texts(
texts=texts, embedding=FakeEmbeddings(), metadatas=metadatas
)
output = docsearch.similarity_search_with_score("foo", k=1)
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_vlite_update_document() -> None:
"""Test updating a document."""
texts = ["foo", "bar", "baz"]
docsearch = VLite.from_texts(
texts=texts, embedding=FakeEmbeddings(), ids=["1", "2", "3"]
)
docsearch.update_document("1", Document(page_content="updated_foo"))
output = docsearch.similarity_search("updated_foo", k=1)
assert output == [Document(page_content="updated_foo")]
def test_vlite_delete_document() -> None:
"""Test deleting a document."""
texts = ["foo", "bar", "baz"]
docsearch = VLite.from_texts(
texts=texts, embedding=FakeEmbeddings(), ids=["1", "2", "3"]
)
docsearch.delete(["1"])
output = docsearch.similarity_search("foo", k=3)
assert Document(page_content="foo") not in output
def test_vlite_get_documents() -> None:
"""Test getting documents by IDs."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = VLite.from_texts(
texts=texts,
embedding=FakeEmbeddings(),
metadatas=metadatas,
ids=["1", "2", "3"],
)
output = docsearch.get(ids=["1", "3"])
assert output == [
Document(page_content="foo", metadata={"page": "0"}),
Document(page_content="baz", metadata={"page": "2"}),
]
def test_vlite_from_existing_index() -> None:
"""Test loading from an existing index."""
texts = ["foo", "bar", "baz"]
VLite.from_texts(
texts=texts, embedding=FakeEmbeddings(), collection="test_collection"
)
new_docsearch = VLite.from_existing_index(
collection="test_collection", embedding=FakeEmbeddings()
)
output = new_docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]

View File

@@ -89,6 +89,7 @@ EXPECTED_ALL = [
"Vectara",
"VectorStore",
"VespaStore",
"VLite",
"Weaviate",
"Yellowbrick",
"ZepVectorStore",

View File

@@ -88,6 +88,7 @@ def test_compatible_vectorstore_documentation() -> None:
"VDMS",
"Vearch",
"VespaStore",
"VLite",
"Weaviate",
"ZepVectorStore",
"Zilliz",

View File

@@ -82,6 +82,7 @@ _EXPECTED = [
"Vearch",
"Vectara",
"VespaStore",
"VLite",
"Weaviate",
"ZepVectorStore",
"Zilliz",