mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-25 13:07:58 +00:00
community[patch]: Added add_images method to SingleStoreDB vector store (#17871)
In this pull request, we introduce the add_images method to the SingleStoreDB vector store class, expanding its capabilities to handle multi-modal embeddings seamlessly. This method facilitates the incorporation of image data into the vector store by associating each image's URI with corresponding document content, metadata, and either pre-generated embeddings or embeddings computed using the embed_image method of the provided embedding object. the change includes integration tests, validating the behavior of the add_images. Additionally, we provide a notebook showcasing the usage of this new method. --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com>
This commit is contained in:
@@ -303,6 +303,35 @@ class SingleStoreDB(VectorStore):
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def add_images(
|
||||
self,
|
||||
uris: List[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
embeddings: Optional[List[List[float]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run images through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
uris List[str]: File path to images.
|
||||
Each URI will be added to the vectorstore as document content.
|
||||
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
|
||||
Defaults to None.
|
||||
embeddings (Optional[List[List[float]]], optional): Optional pre-generated
|
||||
embeddings. Defaults to None.
|
||||
|
||||
Returns:
|
||||
List[str]: empty list
|
||||
"""
|
||||
# Set embeddings
|
||||
if (
|
||||
embeddings is None
|
||||
and self.embedding is not None
|
||||
and hasattr(self.embedding, "embed_image")
|
||||
):
|
||||
embeddings = self.embedding.embed_image(uris=uris)
|
||||
return self.add_texts(uris, metadatas, embeddings, **kwargs)
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
|
@@ -1,4 +1,6 @@
|
||||
"""Test SingleStoreDB functionality."""
|
||||
import os
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
@@ -14,6 +16,7 @@ TEST_SINGLESTOREDB_URL = "root:pass@localhost:3306/db"
|
||||
TEST_SINGLE_RESULT = [Document(page_content="foo")]
|
||||
TEST_SINGLE_WITH_METADATA_RESULT = [Document(page_content="foo", metadata={"a": "b"})]
|
||||
TEST_RESULT = [Document(page_content="foo"), Document(page_content="foo")]
|
||||
TEST_IMAGES_DIR = ""
|
||||
|
||||
try:
|
||||
import singlestoredb as s2
|
||||
@@ -22,6 +25,13 @@ try:
|
||||
except ImportError:
|
||||
singlestoredb_installed = False
|
||||
|
||||
try:
|
||||
from langchain_experimental.open_clip import OpenCLIPEmbeddings
|
||||
|
||||
langchain_experimental_installed = True
|
||||
except ImportError:
|
||||
langchain_experimental_installed = False
|
||||
|
||||
|
||||
def drop(table_name: str) -> None:
|
||||
with s2.connect(TEST_SINGLESTOREDB_URL) as conn:
|
||||
@@ -53,6 +63,9 @@ class RandomEmbeddings(Embeddings):
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
return np.random.rand(100).tolist()
|
||||
|
||||
def embed_image(self, uris: List[str]) -> List[List[float]]:
|
||||
return [np.random.rand(100).tolist() for _ in uris]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def texts() -> List[str]:
|
||||
@@ -156,7 +169,7 @@ def test_singlestoredb_vector_index_large() -> None:
|
||||
table_name = "test_singlestoredb_vector_index_large"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB.from_texts(
|
||||
["foo"] * 300000,
|
||||
["foo"] * 30,
|
||||
RandomEmbeddings(),
|
||||
distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
|
||||
table_name=table_name,
|
||||
@@ -444,3 +457,51 @@ def test_singlestoredb_as_retriever(texts: List[str]) -> None:
|
||||
),
|
||||
]
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_add_image(texts: List[str]) -> None:
|
||||
"""Test adding images"""
|
||||
table_name = "test_singlestoredb_add_image"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB(
|
||||
RandomEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
temp_files = []
|
||||
for _ in range(3):
|
||||
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
||||
temp_file.write(b"foo")
|
||||
temp_file.close()
|
||||
temp_files.append(temp_file.name)
|
||||
|
||||
docsearch.add_images(temp_files)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output[0].page_content in temp_files
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
@pytest.mark.skipif(
|
||||
not langchain_experimental_installed, reason="langchain_experimental not installed"
|
||||
)
|
||||
def test_singestoredb_add_image2() -> None:
|
||||
table_name = "test_singlestoredb_add_images"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB(
|
||||
OpenCLIPEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
image_uris = sorted(
|
||||
[
|
||||
os.path.join(TEST_IMAGES_DIR, image_name)
|
||||
for image_name in os.listdir(TEST_IMAGES_DIR)
|
||||
if image_name.endswith(".jpg")
|
||||
]
|
||||
)
|
||||
docsearch.add_images(image_uris)
|
||||
output = docsearch.similarity_search("horse", k=1)
|
||||
assert "horse" in output[0].page_content
|
||||
drop(table_name)
|
||||
|
Reference in New Issue
Block a user