mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-31 00:29:57 +00:00
community[patch]: Added a function from_existing_collection
in Qdrant
vector database. (#20779)
Issue: #20514
The current implementation of `construct_instance` expects a `texts:
List[str]` that will call the embedding function. This might not be
needed when we already have a client with collection and `path, you
don't want to add any text.
This PR adds a class method that returns a qdrant instance with an
existing client.
Here everytime
cb6e5e56c2/libs/community/langchain_community/vectorstores/qdrant.py (L1592)
`construct_instance` is called, this line sends some text for embedding
generation.
---------
Co-authored-by: Anush <anushshetty90@gmail.com>
This commit is contained in:
parent
893a924b90
commit
8c085fc697
@ -1367,6 +1367,51 @@ class Qdrant(VectorStore):
|
||||
qdrant.add_texts(texts, metadatas, ids, batch_size)
|
||||
return qdrant
|
||||
|
||||
@classmethod
|
||||
def from_existing_collection(
|
||||
cls: Type[Qdrant],
|
||||
embedding: Embeddings,
|
||||
path: str,
|
||||
collection_name: str,
|
||||
location: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
port: Optional[int] = 6333,
|
||||
grpc_port: int = 6334,
|
||||
prefer_grpc: bool = False,
|
||||
https: Optional[bool] = None,
|
||||
api_key: Optional[str] = None,
|
||||
prefix: Optional[str] = None,
|
||||
timeout: Optional[float] = None,
|
||||
host: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> Qdrant:
|
||||
"""
|
||||
Get instance of an existing Qdrant collection.
|
||||
This method will return the instance of the store without inserting any new
|
||||
embeddings
|
||||
"""
|
||||
client, async_client = cls._generate_clients(
|
||||
location=location,
|
||||
url=url,
|
||||
port=port,
|
||||
grpc_port=grpc_port,
|
||||
prefer_grpc=prefer_grpc,
|
||||
https=https,
|
||||
api_key=api_key,
|
||||
prefix=prefix,
|
||||
timeout=timeout,
|
||||
host=host,
|
||||
path=path,
|
||||
**kwargs,
|
||||
)
|
||||
return cls(
|
||||
client=client,
|
||||
async_client=async_client,
|
||||
collection_name=collection_name,
|
||||
embeddings=embedding,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@sync_call_fallback
|
||||
async def afrom_texts(
|
||||
|
@ -0,0 +1,39 @@
|
||||
import tempfile
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_community.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_existing_collection_uses_same_collection(vector_name: str) -> None:
|
||||
"""Test if the Qdrant.from_existing_collection reuses the same collection."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
docs = ["foo"]
|
||||
qdrant = Qdrant.from_texts(
|
||||
docs,
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
path=str(tmpdir),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del qdrant
|
||||
|
||||
qdrant = Qdrant.from_existing_collection(
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
path=str(tmpdir),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
qdrant.add_texts(["baz", "bar"])
|
||||
del qdrant
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 3 == client.count(collection_name).count
|
Loading…
Reference in New Issue
Block a user