mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
qdrant: init package (#21146)
## Description This PR introduces the new `langchain-qdrant` partner package, intending to deprecate the community package. ## Changes - Moved the Qdrant vector store implementation `/libs/partners/qdrant` with integration tests. - The conditional imports of the client library are now regular with minor implementation improvements. - Added a deprecation warning to `langchain_community.vectorstores.qdrant.Qdrant`. - Replaced references/imports from `langchain_community` with either `langchain_core` or by moving the definitions to the `langchain_qdrant` package itself. - Updated the Qdrant vector store documentation to reflect the changes. ## Testing - `QDRANT_URL` and [`QDRANT_API_KEY`](583e36bf6b
) env values need to be set to [run integration tests](d608c93d1f
) in the [cloud](https://cloud.qdrant.tech). - If a Qdrant instance is running at `http://localhost:6333`, the integration tests will use it too. - By default, tests use an [`in-memory`](https://github.com/qdrant/qdrant-client?tab=readme-ov-file#local-mode) instance(Not comprehensive). --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Erick Friis <erickfriis@gmail.com>
This commit is contained in:
parent
fe8c9d621a
commit
edd68e4ad4
@ -30,7 +30,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet qdrant-client"
|
||||
"%pip install --upgrade --quiet langchain-qdrant langchain-openai langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -79,8 +79,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Qdrant\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_qdrant import Qdrant\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
@ -216,7 +216,7 @@
|
||||
"source": [
|
||||
"### Qdrant Cloud\n",
|
||||
"\n",
|
||||
"If you prefer not to keep yourself busy with managing the infrastructure, you can choose to set up a fully-managed Qdrant cluster on [Qdrant Cloud](https://cloud.qdrant.io/). There is a free forever 1GB cluster included for trying out. The main difference with using a managed version of Qdrant is that you'll need to provide an API key to secure your deployment from being accessed publicly."
|
||||
"If you prefer not to keep yourself busy with managing the infrastructure, you can choose to set up a fully-managed Qdrant cluster on [Qdrant Cloud](https://cloud.qdrant.io/). There is a free forever 1GB cluster included for trying out. The main difference with using a managed version of Qdrant is that you'll need to provide an API key to secure your deployment from being accessed publicly. The value can also be set in a `QDRANT_API_KEY` environment variable."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -243,6 +243,36 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "825c7903",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using an existing collection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f772575",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To get an instance of `langchain_qdrant.Qdrant` without loading any new documents or texts, you can use the `Qdrant.from_existing_collection()` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "daf7a6e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qdrant = Qdrant.from_existing_collection(\n",
|
||||
" embeddings=embeddings,\n",
|
||||
" collection_name=\"my_documents\",\n",
|
||||
" url=\"http://localhost:6333\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@ -251,7 +281,7 @@
|
||||
"source": [
|
||||
"## Recreating the collection\n",
|
||||
"\n",
|
||||
"Both `Qdrant.from_texts` and `Qdrant.from_documents` methods are great to start using Qdrant with Langchain. In the previous versions the collection was recreated every time you called any of them. That behaviour has changed. Currently, the collection is going to be reused if it already exists. Setting `force_recreate` to `True` allows to remove the old collection and start from scratch."
|
||||
"The collection is reused if it already exists. Setting `force_recreate` to `True` allows to remove the old collection and start from scratch."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -520,7 +550,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": null,
|
||||
"id": "9427195f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@ -528,21 +558,9 @@
|
||||
"start_time": "2023-04-04T10:51:26.018763Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"VectorStoreRetriever(vectorstore=<langchain_community.vectorstores.qdrant.Qdrant object at 0x7fc4e5720a00>, search_type='similarity', search_kwargs={})"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = qdrant.as_retriever()\n",
|
||||
"retriever"
|
||||
"retriever = qdrant.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -556,7 +574,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"id": "64348f1b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@ -564,21 +582,9 @@
|
||||
"start_time": "2023-04-04T10:51:26.034284Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"VectorStoreRetriever(vectorstore=<langchain_community.vectorstores.qdrant.Qdrant object at 0x7fc4e5720a00>, search_type='mmr', search_kwargs={})"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = qdrant.as_retriever(search_type=\"mmr\")\n",
|
||||
"retriever"
|
||||
"retriever = qdrant.as_retriever(search_type=\"mmr\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -678,7 +684,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": null,
|
||||
"id": "e4d6baf9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@ -686,18 +692,7 @@
|
||||
"start_time": "2023-04-04T11:08:30.229748Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<langchain_community.vectorstores.qdrant.Qdrant at 0x7fc4e2baa230>"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Qdrant.from_documents(\n",
|
||||
" docs,\n",
|
||||
|
@ -22,6 +22,7 @@ from typing import (
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.runnables.config import run_in_executor
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
@ -65,6 +66,9 @@ def sync_call_fallback(method: Callable) -> Callable:
|
||||
return wrapper
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.0.37", removal="0.3.0", alternative_import="langchain_qdrant.Qdrant"
|
||||
)
|
||||
class Qdrant(VectorStore):
|
||||
"""`Qdrant` vector store.
|
||||
|
||||
|
1
libs/partners/qdrant/.gitignore
vendored
Normal file
1
libs/partners/qdrant/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
__pycache__
|
21
libs/partners/qdrant/LICENSE
Normal file
21
libs/partners/qdrant/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
60
libs/partners/qdrant/Makefile
Normal file
60
libs/partners/qdrant/Makefile
Normal file
@ -0,0 +1,60 @@
|
||||
.PHONY: all format lint test tests integration_test integration_tests help
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
integration_test integration_tests: TEST_FILE = tests/integration_tests/
|
||||
|
||||
test tests integration_test integration_tests:
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
MYPY_CACHE=.mypy_cache
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/qdrant --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
lint_package: PYTHON_FILES=langchain_qdrant
|
||||
lint_tests: PYTHON_FILES=tests
|
||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
poetry run ruff .
|
||||
poetry run ruff format $(PYTHON_FILES) --diff
|
||||
poetry run ruff --select I $(PYTHON_FILES)
|
||||
mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
poetry run ruff format $(PYTHON_FILES)
|
||||
poetry run ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
poetry run codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
poetry run codespell --toml pyproject.toml -w
|
||||
|
||||
check_imports: $(shell find langchain_qdrant -name '*.py')
|
||||
poetry run python ./scripts/check_imports.py $^
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'check_imports - check imports'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'tests - run unit tests'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
||||
@echo 'integration_test - run integration tests'
|
||||
@echo 'integration_tests - run integration tests'
|
25
libs/partners/qdrant/README.md
Normal file
25
libs/partners/qdrant/README.md
Normal file
@ -0,0 +1,25 @@
|
||||
# langchain-qdrant
|
||||
|
||||
This package contains the LangChain integration with [Qdrant](https://qdrant.tech/).
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -U langchain-qdrant
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The `Qdrant` class exposes the connection to the Qdrant vector store.
|
||||
|
||||
```python
|
||||
from langchain_qdrant import Qdrant
|
||||
|
||||
embeddings = ... # use a LangChain Embeddings class
|
||||
|
||||
vectorstore = Qdrant.from_existing_collection(
|
||||
embeddings=embeddings,
|
||||
collection_name="<COLLECTION_NAME>",
|
||||
url="http://localhost:6333",
|
||||
)
|
||||
```
|
3
libs/partners/qdrant/langchain_qdrant/__init__.py
Normal file
3
libs/partners/qdrant/langchain_qdrant/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from langchain_qdrant.vectorstores import Qdrant
|
||||
|
||||
__all__ = ["Qdrant"]
|
70
libs/partners/qdrant/langchain_qdrant/_utils.py
Normal file
70
libs/partners/qdrant/langchain_qdrant/_utils.py
Normal file
@ -0,0 +1,70 @@
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
|
||||
|
||||
|
||||
def maximal_marginal_relevance(
|
||||
query_embedding: np.ndarray,
|
||||
embedding_list: list,
|
||||
lambda_mult: float = 0.5,
|
||||
k: int = 4,
|
||||
) -> List[int]:
|
||||
"""Calculate maximal marginal relevance."""
|
||||
if min(k, len(embedding_list)) <= 0:
|
||||
return []
|
||||
if query_embedding.ndim == 1:
|
||||
query_embedding = np.expand_dims(query_embedding, axis=0)
|
||||
similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0]
|
||||
most_similar = int(np.argmax(similarity_to_query))
|
||||
idxs = [most_similar]
|
||||
selected = np.array([embedding_list[most_similar]])
|
||||
while len(idxs) < min(k, len(embedding_list)):
|
||||
best_score = -np.inf
|
||||
idx_to_add = -1
|
||||
similarity_to_selected = cosine_similarity(embedding_list, selected)
|
||||
for i, query_score in enumerate(similarity_to_query):
|
||||
if i in idxs:
|
||||
continue
|
||||
redundant_score = max(similarity_to_selected[i])
|
||||
equation_score = (
|
||||
lambda_mult * query_score - (1 - lambda_mult) * redundant_score
|
||||
)
|
||||
if equation_score > best_score:
|
||||
best_score = equation_score
|
||||
idx_to_add = i
|
||||
idxs.append(idx_to_add)
|
||||
selected = np.append(selected, [embedding_list[idx_to_add]], axis=0)
|
||||
return idxs
|
||||
|
||||
|
||||
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
||||
"""Row-wise cosine similarity between two equal-width matrices."""
|
||||
if len(X) == 0 or len(Y) == 0:
|
||||
return np.array([])
|
||||
|
||||
X = np.array(X)
|
||||
Y = np.array(Y)
|
||||
if X.shape[1] != Y.shape[1]:
|
||||
raise ValueError(
|
||||
f"Number of columns in X and Y must be the same. X has shape {X.shape} "
|
||||
f"and Y has shape {Y.shape}."
|
||||
)
|
||||
try:
|
||||
import simsimd as simd # type: ignore
|
||||
|
||||
X = np.array(X, dtype=np.float32)
|
||||
Y = np.array(Y, dtype=np.float32)
|
||||
Z = 1 - simd.cdist(X, Y, metric="cosine")
|
||||
if isinstance(Z, float):
|
||||
return np.array([Z])
|
||||
return np.array(Z)
|
||||
except ImportError:
|
||||
X_norm = np.linalg.norm(X, axis=1)
|
||||
Y_norm = np.linalg.norm(Y, axis=1)
|
||||
# Ignore divide by zero errors run time warnings as those are handled below.
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
|
||||
similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
|
||||
return similarity
|
0
libs/partners/qdrant/langchain_qdrant/py.typed
Normal file
0
libs/partners/qdrant/langchain_qdrant/py.typed
Normal file
2253
libs/partners/qdrant/langchain_qdrant/vectorstores.py
Normal file
2253
libs/partners/qdrant/langchain_qdrant/vectorstores.py
Normal file
File diff suppressed because it is too large
Load Diff
1268
libs/partners/qdrant/poetry.lock
generated
Normal file
1268
libs/partners/qdrant/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
95
libs/partners/qdrant/pyproject.toml
Normal file
95
libs/partners/qdrant/pyproject.toml
Normal file
@ -0,0 +1,95 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-qdrant"
|
||||
version = "0.0.1"
|
||||
description = "An integration package connecting Qdrant and LangChain"
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
license = "MIT"
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/qdrant"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain-core = ">=0.1.52,<0.3"
|
||||
qdrant-client = "^1.9.0"
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.3.0"
|
||||
freezegun = "^1.2.2"
|
||||
pytest-mock = "^3.10.0"
|
||||
syrupy = "^4.0.2"
|
||||
pytest-watcher = "^0.3.4"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
requests = "^2.31.0"
|
||||
|
||||
[tool.poetry.group.codespell]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.codespell.dependencies]
|
||||
codespell = "^2.2.0"
|
||||
|
||||
[tool.poetry.group.test_integration]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test_integration.dependencies]
|
||||
|
||||
[tool.poetry.group.lint]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.lint.dependencies]
|
||||
ruff = "^0.1.5"
|
||||
|
||||
[tool.poetry.group.typing.dependencies]
|
||||
mypy = "^0.991"
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = true
|
||||
|
||||
[tool.coverage.run]
|
||||
omit = [
|
||||
"tests/*",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# --strict-markers will raise errors on unknown marks.
|
||||
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
||||
#
|
||||
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
||||
# --strict-config any warnings encountered while parsing the `pytest`
|
||||
# section of the configuration file raise errors.
|
||||
#
|
||||
# https://github.com/tophat/syrupy
|
||||
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
||||
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
|
||||
# Registering custom markers.
|
||||
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"asyncio: mark tests as requiring asyncio",
|
||||
"compile: mark placeholder test used to compile integration tests without running them",
|
||||
]
|
||||
asyncio_mode = "auto"
|
17
libs/partners/qdrant/scripts/check_imports.py
Normal file
17
libs/partners/qdrant/scripts/check_imports.py
Normal file
@ -0,0 +1,17 @@
|
||||
import sys
|
||||
import traceback
|
||||
from importlib.machinery import SourceFileLoader
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
has_failure = False
|
||||
for file in files:
|
||||
try:
|
||||
SourceFileLoader("x", file).load_module()
|
||||
except Exception:
|
||||
has_faillure = True
|
||||
print(file)
|
||||
traceback.print_exc()
|
||||
print()
|
||||
|
||||
sys.exit(1 if has_failure else 0)
|
27
libs/partners/qdrant/scripts/check_pydantic.sh
Executable file
27
libs/partners/qdrant/scripts/check_pydantic.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This script searches for lines starting with "import pydantic" or "from pydantic"
|
||||
# in tracked files within a Git repository.
|
||||
#
|
||||
# Usage: ./scripts/check_pydantic.sh /path/to/repository
|
||||
|
||||
# Check if a path argument is provided
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 /path/to/repository"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repository_path="$1"
|
||||
|
||||
# Search for lines matching the pattern within the specified repository
|
||||
result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
|
||||
|
||||
# Check if any matching lines were found
|
||||
if [ -n "$result" ]; then
|
||||
echo "ERROR: The following lines need to be updated:"
|
||||
echo "$result"
|
||||
echo "Please replace the code with an import from langchain_core.pydantic_v1."
|
||||
echo "For example, replace 'from pydantic import BaseModel'"
|
||||
echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
|
||||
exit 1
|
||||
fi
|
17
libs/partners/qdrant/scripts/lint_imports.sh
Executable file
17
libs/partners/qdrant/scripts/lint_imports.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
# Initialize a variable to keep track of errors
|
||||
errors=0
|
||||
|
||||
# make sure not importing from langchain or langchain_experimental
|
||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
||||
|
||||
# Decide on an exit status based on the errors
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
exit 1
|
||||
else
|
||||
exit 0
|
||||
fi
|
0
libs/partners/qdrant/tests/__init__.py
Normal file
0
libs/partners/qdrant/tests/__init__.py
Normal file
@ -0,0 +1,123 @@
|
||||
import os
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import ConsistentFakeEmbeddings
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
API_KEY = os.getenv("QDRANT_API_KEY")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_returns_all_ids(
|
||||
batch_size: int, qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
batch_size=batch_size,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
ids = await docsearch.aadd_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_stores_duplicated_texts(
|
||||
vector_name: Optional[str], qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient(location=qdrant_location, api_key=API_KEY)
|
||||
collection_name = uuid.uuid4().hex
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = await vec_store.aadd_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_stores_ids(
|
||||
batch_size: int, qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient(location=qdrant_location, api_key=API_KEY)
|
||||
collection_name = uuid.uuid4().hex
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
|
||||
returned_ids = await vec_store.aadd_texts(
|
||||
["abc", "def"], ids=ids, batch_size=batch_size
|
||||
)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_aadd_texts_stores_embeddings_as_named_vectors(
|
||||
vector_name: str, qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.aadd_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
client = QdrantClient(location=qdrant_location, api_key=API_KEY)
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
await vec_store.aadd_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
@ -0,0 +1,266 @@
|
||||
import os
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from langchain_qdrant.vectorstores import QdrantException
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_duplicated_texts(qdrant_location: str) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores duplicated texts separately."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str], qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores provided ids."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_embeddings_as_named_vectors(
|
||||
vector_name: str,
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.afrom_texts stores named vectors if name is provided."""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_reuses_same_collection(
|
||||
location: str, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts reuses the same collection"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
vec_store = await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
client = vec_store.client
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
location: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if dimensionality does not
|
||||
match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
async def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
location: str,
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if vector name does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=first_vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=second_vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
async def test_qdrant_from_texts_raises_error_on_different_distance(
|
||||
location: str,
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts raises an exception if distance does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
distance_func="Cosine",
|
||||
location=location,
|
||||
)
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
distance_func="Euclid",
|
||||
location=location,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
async def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
location: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.afrom_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
await Qdrant.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
location=location,
|
||||
)
|
||||
|
||||
client = QdrantClient(location=location, api_key=os.getenv("QDRANT_API_KEY"))
|
||||
assert 2 == client.count(collection_name).count
|
||||
vector_params = client.get_collection(collection_name).config.params.vectors
|
||||
if vector_name is not None:
|
||||
vector_params = vector_params[vector_name] # type: ignore[index]
|
||||
assert 5 == vector_params.size # type: ignore[union-attr]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await Qdrant.afrom_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||
)
|
@ -0,0 +1,51 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
distance_func="EUCLID", # Euclid distance used to avoid normalization
|
||||
)
|
||||
output = await docsearch.amax_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
@ -0,0 +1,305 @@
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert_documents_equals([document], [Document(page_content="foo")])
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_filters(
|
||||
batch_size: int, vector_name: Optional[str], qdrant_location: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
output = await docsearch.asimilarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_relevance_score_with_threshold(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, **kwargs
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_similarity_search_with_relevance_score_with_threshold_and_filter(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=3, **kwargs
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
async def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: str,
|
||||
qdrant_location: str,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
79
libs/partners/qdrant/tests/integration_tests/common.py
Normal file
79
libs/partners/qdrant/tests/integration_tests/common.py
Normal file
@ -0,0 +1,79 @@
|
||||
from typing import List
|
||||
|
||||
import requests # type: ignore
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
|
||||
def qdrant_running_locally() -> bool:
|
||||
"""Check if Qdrant is running at http://localhost:6333."""
|
||||
|
||||
try:
|
||||
response = requests.get("http://localhost:6333", timeout=10.0)
|
||||
response_json = response.json()
|
||||
return response_json.get("title") == "qdrant - vector search engine"
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
return False
|
||||
|
||||
|
||||
def assert_documents_equals(actual: List[Document], expected: List[Document]): # type: ignore[no-untyped-def]
|
||||
assert len(actual) == len(expected)
|
||||
|
||||
for actual_doc, expected_doc in zip(actual, expected):
|
||||
assert actual_doc.page_content == expected_doc.page_content
|
||||
|
||||
assert "_id" in actual_doc.metadata
|
||||
assert "_collection_name" in actual_doc.metadata
|
||||
|
||||
actual_doc.metadata.pop("_id")
|
||||
actual_doc.metadata.pop("_collection_name")
|
||||
|
||||
assert actual_doc.metadata == expected_doc.metadata
|
||||
|
||||
|
||||
class FakeEmbeddings(Embeddings):
|
||||
"""Fake embeddings functionality for testing."""
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return simple embeddings.
|
||||
Embeddings encode each text as its index."""
|
||||
return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))]
|
||||
|
||||
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return self.embed_documents(texts)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return constant query embeddings.
|
||||
Embeddings are identical to embed_documents(texts)[0].
|
||||
Distance to each text will be that text's index,
|
||||
as it was passed to embed_documents."""
|
||||
return [float(1.0)] * 9 + [float(0.0)]
|
||||
|
||||
async def aembed_query(self, text: str) -> List[float]:
|
||||
return self.embed_query(text)
|
||||
|
||||
|
||||
class ConsistentFakeEmbeddings(FakeEmbeddings):
|
||||
"""Fake embeddings which remember all the texts seen so far to return consistent
|
||||
vectors for the same texts."""
|
||||
|
||||
def __init__(self, dimensionality: int = 10) -> None:
|
||||
self.known_texts: List[str] = []
|
||||
self.dimensionality = dimensionality
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return consistent embeddings for each text seen so far."""
|
||||
out_vectors = []
|
||||
for text in texts:
|
||||
if text not in self.known_texts:
|
||||
self.known_texts.append(text)
|
||||
vector = [float(1.0)] * (self.dimensionality - 1) + [
|
||||
float(self.known_texts.index(text))
|
||||
]
|
||||
out_vectors.append(vector)
|
||||
return out_vectors
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return consistent embeddings for the text, if seen before, or a constant
|
||||
one if the text is unknown."""
|
||||
return self.embed_documents([text])[0]
|
15
libs/partners/qdrant/tests/integration_tests/conftest.py
Normal file
15
libs/partners/qdrant/tests/integration_tests/conftest.py
Normal file
@ -0,0 +1,15 @@
|
||||
import os
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
def pytest_sessionfinish() -> None:
|
||||
"""Clean up all collections after the test session."""
|
||||
for location in qdrant_locations():
|
||||
client = QdrantClient(location=location, api_key=os.getenv("QDRANT_API_KEY"))
|
||||
collections = client.get_collections().collections
|
||||
|
||||
for collection in collections:
|
||||
client.delete_collection(collection.name)
|
25
libs/partners/qdrant/tests/integration_tests/fixtures.py
Normal file
25
libs/partners/qdrant/tests/integration_tests/fixtures.py
Normal file
@ -0,0 +1,25 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
from tests.integration_tests.common import qdrant_running_locally
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def qdrant_locations(use_in_memory: bool = True) -> List[str]:
|
||||
locations = []
|
||||
|
||||
if use_in_memory:
|
||||
logger.info("Running Qdrant tests with in-memory mode.")
|
||||
locations.append(":memory:")
|
||||
|
||||
if qdrant_running_locally():
|
||||
logger.info("Running Qdrant tests with local Qdrant instance.")
|
||||
locations.append("http://localhost:6333")
|
||||
|
||||
if qdrant_url := os.getenv("QDRANT_URL"):
|
||||
logger.info(f"Running Qdrant tests with Qdrant instance at {qdrant_url}.")
|
||||
locations.append(qdrant_url)
|
||||
|
||||
return locations
|
135
libs/partners/qdrant/tests/integration_tests/test_add_texts.py
Normal file
135
libs/partners/qdrant/tests/integration_tests/test_add_texts.py
Normal file
@ -0,0 +1,135 @@
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_documents_extends_existing_collection(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
new_texts = ["foobar", "foobaz"]
|
||||
docsearch.add_documents(
|
||||
[Document(page_content=content) for content in new_texts], batch_size=batch_size
|
||||
)
|
||||
output = docsearch.similarity_search("foobar", k=1)
|
||||
# ConsistentFakeEmbeddings return the same query embedding as the first document
|
||||
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
||||
# same as "foobar" embedding
|
||||
assert_documents_equals(output, [Document(page_content="foobar")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_returns_all_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts returns unique ids."""
|
||||
docsearch: Qdrant = Qdrant.from_texts(
|
||||
["foobar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
ids = docsearch.add_texts(["foo", "bar", "baz"])
|
||||
assert 3 == len(ids)
|
||||
assert 3 == len(set(ids))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_add_texts_stores_duplicated_texts(vector_name: Optional[str]) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
vectors_config = rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
if vector_name is not None:
|
||||
vectors_config = {vector_name: vectors_config} # type: ignore[assignment]
|
||||
client.recreate_collection(collection_name, vectors_config=vectors_config)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
ids = vec_store.add_texts(["abc", "abc"], [{"a": 1}, {"a": 2}])
|
||||
|
||||
assert 2 == len(set(ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
def test_qdrant_add_texts_stores_ids(batch_size: int) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config=rest.VectorParams(size=10, distance=rest.Distance.COSINE),
|
||||
)
|
||||
|
||||
vec_store = Qdrant(client, collection_name, ConsistentFakeEmbeddings())
|
||||
returned_ids = vec_store.add_texts(["abc", "def"], ids=ids, batch_size=batch_size)
|
||||
|
||||
assert all(first == second for first, second in zip(ids, returned_ids))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_add_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.add_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
client.recreate_collection(
|
||||
collection_name,
|
||||
vectors_config={
|
||||
vector_name: rest.VectorParams(size=10, distance=rest.Distance.COSINE)
|
||||
},
|
||||
)
|
||||
|
||||
vec_store = Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
ConsistentFakeEmbeddings(),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
vec_store.add_texts(["lorem", "ipsum", "dolor", "sit", "amet"])
|
||||
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
@ -0,0 +1,7 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.compile
|
||||
def test_placeholder() -> None:
|
||||
"""Used for compiling integration tests without running any real tests."""
|
||||
pass
|
@ -0,0 +1,58 @@
|
||||
import uuid
|
||||
from typing import Callable, Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.embeddings import Embeddings
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import ConsistentFakeEmbeddings
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), None),
|
||||
(ConsistentFakeEmbeddings().embed_query, None),
|
||||
(None, ConsistentFakeEmbeddings().embed_query),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant may accept different types for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["embeddings", "embedding_function"],
|
||||
[
|
||||
(ConsistentFakeEmbeddings(), ConsistentFakeEmbeddings().embed_query),
|
||||
(None, None),
|
||||
],
|
||||
)
|
||||
def test_qdrant_embedding_interface_raises_value_error(
|
||||
embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
|
||||
) -> None:
|
||||
"""Test Qdrant requires only one method for embeddings."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient(":memory:")
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Qdrant(
|
||||
client,
|
||||
collection_name,
|
||||
embeddings=embeddings,
|
||||
embedding_function=embedding_function,
|
||||
)
|
@ -0,0 +1,37 @@
|
||||
import tempfile
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import ConsistentFakeEmbeddings
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_existing_collection_uses_same_collection(vector_name: str) -> None:
|
||||
"""Test if the Qdrant.from_existing_collection reuses the same collection."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
docs = ["foo"]
|
||||
qdrant = Qdrant.from_texts(
|
||||
docs,
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
path=str(tmpdir),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del qdrant
|
||||
|
||||
qdrant = Qdrant.from_existing_collection(
|
||||
embedding=ConsistentFakeEmbeddings(),
|
||||
path=str(tmpdir),
|
||||
collection_name=collection_name,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
qdrant.add_texts(["baz", "bar"])
|
||||
del qdrant
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 3 == client.count(collection_name).count
|
288
libs/partners/qdrant/tests/integration_tests/test_from_texts.py
Normal file
288
libs/partners/qdrant/tests/integration_tests/test_from_texts.py
Normal file
@ -0,0 +1,288 @@
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from langchain_qdrant.vectorstores import QdrantException
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
from tests.integration_tests.fixtures import qdrant_locations
|
||||
|
||||
|
||||
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||
"""Test end to end Qdrant.from_texts stores duplicated texts separately."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "abc"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_from_texts_stores_ids(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores provided ids."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
ids = [
|
||||
"fa38d572-4c31-4579-aedc-1960d79df6df",
|
||||
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
|
||||
]
|
||||
vec_store = Qdrant.from_texts(
|
||||
["abc", "def"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
ids=ids,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
stored_ids = [point.id for point in client.scroll(collection_name)[0]]
|
||||
assert set(ids) == set(stored_ids)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", ["custom-vector"])
|
||||
def test_qdrant_from_texts_stores_embeddings_as_named_vectors(vector_name: str) -> None:
|
||||
"""Test end to end Qdrant.from_texts stores named vectors if name is provided."""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 5 == client.count(collection_name).count
|
||||
assert all(
|
||||
vector_name in point.vector # type: ignore[operator]
|
||||
for point in client.scroll(collection_name, with_vectors=True)[0]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_reuses_same_collection(vector_name: Optional[str]) -> None:
|
||||
"""Test if Qdrant.from_texts reuses the same collection"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
embeddings = ConsistentFakeEmbeddings()
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
embeddings,
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 7 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_raises_error_on_different_dimensionality(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if dimensionality does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["first_vector_name", "second_vector_name"],
|
||||
[
|
||||
(None, "custom-vector"),
|
||||
("custom-vector", None),
|
||||
("my-first-vector", "my-second_vector"),
|
||||
],
|
||||
)
|
||||
def test_qdrant_from_texts_raises_error_on_different_vector_name(
|
||||
first_vector_name: Optional[str],
|
||||
second_vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if vector name does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=first_vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException):
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=second_vector_name,
|
||||
)
|
||||
|
||||
|
||||
def test_qdrant_from_texts_raises_error_on_different_distance() -> None:
|
||||
"""Test if Qdrant.from_texts raises an exception if distance does not match"""
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Cosine",
|
||||
)
|
||||
del vec_store
|
||||
|
||||
with pytest.raises(QdrantException) as excinfo:
|
||||
Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
distance_func="Euclid",
|
||||
)
|
||||
|
||||
expected_message = (
|
||||
"configured for COSINE similarity, but requested EUCLID. Please set "
|
||||
"`distance_func` parameter to `COSINE`"
|
||||
)
|
||||
assert expected_message in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "custom-vector"])
|
||||
def test_qdrant_from_texts_recreates_collection_on_force_recreate(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test if Qdrant.from_texts recreates the collection even if config mismatches"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
vec_store = Qdrant.from_texts(
|
||||
["lorem", "ipsum", "dolor", "sit", "amet"],
|
||||
ConsistentFakeEmbeddings(dimensionality=10),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
vec_store = Qdrant.from_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(dimensionality=5),
|
||||
collection_name=collection_name,
|
||||
path=str(tmpdir),
|
||||
vector_name=vector_name,
|
||||
force_recreate=True,
|
||||
)
|
||||
del vec_store
|
||||
|
||||
client = QdrantClient(path=str(tmpdir))
|
||||
assert 2 == client.count(collection_name).count
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size: int, content_payload_key: str, metadata_payload_key: str
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
|
||||
def test_from_texts_passed_optimizers_config_and_on_disk_payload(location: str) -> None:
|
||||
from qdrant_client import models
|
||||
|
||||
collection_name = uuid.uuid4().hex
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
optimizers_config = models.OptimizersConfigDiff(memmap_threshold=1000)
|
||||
vec_store = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
optimizers_config=optimizers_config,
|
||||
on_disk_payload=True,
|
||||
on_disk=True,
|
||||
collection_name=collection_name,
|
||||
location=location,
|
||||
)
|
||||
|
||||
collection_info = vec_store.client.get_collection(collection_name)
|
||||
assert collection_info.config.params.vectors.on_disk is True # type: ignore
|
||||
assert collection_info.config.optimizer_config.memmap_threshold == 1000
|
||||
assert collection_info.config.params.on_disk_payload is True
|
@ -0,0 +1,67 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "test_content"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "test_metadata"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_max_marginal_relevance_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
from qdrant_client import models
|
||||
|
||||
filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=f"{metadata_payload_key}.page",
|
||||
match=models.MatchValue(
|
||||
value=2,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
distance_func="EUCLID", # Euclid distance used to avoid normalization
|
||||
)
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
||||
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
|
||||
)
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[Document(page_content="baz", metadata={"page": 2})],
|
||||
)
|
@ -0,0 +1,284 @@
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_qdrant import Qdrant
|
||||
from tests.integration_tests.common import (
|
||||
ConsistentFakeEmbeddings,
|
||||
assert_documents_equals,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_score_by_vector(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert_documents_equals(actual=[document], expected=[Document(page_content="foo")])
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_filters(
|
||||
batch_size: int, vector_name: Optional[str]
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_no_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores(
|
||||
"foo", k=3, score_threshold=None
|
||||
)
|
||||
assert len(output) == 3
|
||||
for i in range(len(output)):
|
||||
assert round(output[i][1], 2) >= 0
|
||||
assert round(output[i][1], 2) <= 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
score_threshold = 0.98
|
||||
kwargs = {"score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
score_threshold = 0.99 # for almost exact match
|
||||
# test negative filter condition
|
||||
negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
kwargs = {"filter": negative_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 0
|
||||
# test positive filter condition
|
||||
positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}}
|
||||
kwargs = {"filter": positive_filter, "score_threshold": score_threshold}
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs)
|
||||
assert len(output) == 1
|
||||
assert all([score >= score_threshold for _, score in output])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
vector_name=vector_name,
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize("content_payload_key", [Qdrant.CONTENT_KEY, "foo"])
|
||||
@pytest.mark.parametrize("metadata_payload_key", [Qdrant.METADATA_KEY, "bar"])
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
def test_qdrant_similarity_search_with_relevance_scores(
|
||||
batch_size: int,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
vector_name: Optional[str],
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
location=":memory:",
|
||||
content_payload_key=content_payload_key,
|
||||
metadata_payload_key=metadata_payload_key,
|
||||
batch_size=batch_size,
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
0
libs/partners/qdrant/tests/unit_tests/__init__.py
Normal file
0
libs/partners/qdrant/tests/unit_tests/__init__.py
Normal file
7
libs/partners/qdrant/tests/unit_tests/test_imports.py
Normal file
7
libs/partners/qdrant/tests/unit_tests/test_imports.py
Normal file
@ -0,0 +1,7 @@
|
||||
from langchain_qdrant import __all__
|
||||
|
||||
EXPECTED_ALL = ["Qdrant"]
|
||||
|
||||
|
||||
def test_all_imports() -> None:
|
||||
assert sorted(EXPECTED_ALL) == sorted(__all__)
|
Loading…
Reference in New Issue
Block a user