mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-05 15:18:32 +00:00
Signed-off-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com> Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com> Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: ccurme <chester.curme@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com> Co-authored-by: ZhangShenao <15201440436@163.com> Co-authored-by: Friso H. Kingma <fhkingma@gmail.com> Co-authored-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Nuno Campos <nuno@langchain.dev> Co-authored-by: Morgante Pell <morgantep@google.com>
234 lines
6.4 KiB
Python
234 lines
6.4 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any, Dict, Generator, Tuple
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from pydantic import Field
|
|
|
|
if TYPE_CHECKING:
|
|
from docarray.index import (
|
|
ElasticDocIndex,
|
|
HnswDocumentIndex,
|
|
InMemoryExactNNIndex,
|
|
QdrantDocumentIndex,
|
|
WeaviateDocumentIndex,
|
|
)
|
|
from docarray.typing import NdArray
|
|
from qdrant_client.http import models as rest
|
|
|
|
from langchain_community.embeddings import FakeEmbeddings
|
|
|
|
|
|
@pytest.fixture
|
|
def init_weaviate() -> (
|
|
Generator[
|
|
Tuple[WeaviateDocumentIndex, Dict[str, Any], FakeEmbeddings],
|
|
None,
|
|
None,
|
|
]
|
|
):
|
|
"""
|
|
cd tests/integration_tests/vectorstores/docker-compose
|
|
docker compose -f weaviate.yml up
|
|
"""
|
|
from docarray import BaseDoc
|
|
from docarray.index import (
|
|
WeaviateDocumentIndex,
|
|
)
|
|
|
|
class WeaviateDoc(BaseDoc):
|
|
# When initializing the Weaviate index, denote the field
|
|
# you want to search on with `is_embedding=True`
|
|
title: str
|
|
title_embedding: NdArray[32] = Field(is_embedding=True) # type: ignore
|
|
other_emb: NdArray[32] # type: ignore
|
|
year: int
|
|
|
|
embeddings = FakeEmbeddings(size=32)
|
|
|
|
# initialize WeaviateDocumentIndex
|
|
dbconfig = WeaviateDocumentIndex.DBConfig(host="http://localhost:8080")
|
|
weaviate_db = WeaviateDocumentIndex[WeaviateDoc](
|
|
db_config=dbconfig, index_name="docarray_retriever"
|
|
)
|
|
|
|
# index data
|
|
weaviate_db.index(
|
|
[
|
|
WeaviateDoc(
|
|
title=f"My document {i}",
|
|
title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
|
|
other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
|
|
year=i,
|
|
)
|
|
for i in range(100)
|
|
]
|
|
)
|
|
# build a filter query
|
|
filter_query = {"path": ["year"], "operator": "LessThanEqual", "valueInt": "90"}
|
|
|
|
yield weaviate_db, filter_query, embeddings
|
|
|
|
weaviate_db._client.schema.delete_all()
|
|
|
|
|
|
@pytest.fixture
|
|
def init_elastic() -> (
|
|
Generator[Tuple[ElasticDocIndex, Dict[str, Any], FakeEmbeddings], None, None]
|
|
):
|
|
"""
|
|
cd tests/integration_tests/vectorstores/docker-compose
|
|
docker-compose -f elasticsearch.yml up
|
|
"""
|
|
from docarray import BaseDoc
|
|
from docarray.index import (
|
|
ElasticDocIndex,
|
|
)
|
|
|
|
class MyDoc(BaseDoc):
|
|
title: str
|
|
title_embedding: NdArray[32] # type: ignore
|
|
other_emb: NdArray[32] # type: ignore
|
|
year: int
|
|
|
|
embeddings = FakeEmbeddings(size=32)
|
|
|
|
# initialize ElasticDocIndex
|
|
elastic_db = ElasticDocIndex[MyDoc](
|
|
hosts="http://localhost:9200", index_name="docarray_retriever"
|
|
)
|
|
# index data
|
|
elastic_db.index(
|
|
[
|
|
MyDoc(
|
|
title=f"My document {i}",
|
|
title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
|
|
other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
|
|
year=i,
|
|
)
|
|
for i in range(100)
|
|
]
|
|
)
|
|
# build a filter query
|
|
filter_query = {"range": {"year": {"lte": 90}}}
|
|
|
|
yield elastic_db, filter_query, embeddings
|
|
|
|
elastic_db._client.indices.delete(index="docarray_retriever")
|
|
|
|
|
|
@pytest.fixture
|
|
def init_qdrant() -> Tuple[QdrantDocumentIndex, rest.Filter, FakeEmbeddings]:
|
|
from docarray import BaseDoc
|
|
from docarray.index import QdrantDocumentIndex
|
|
|
|
class MyDoc(BaseDoc):
|
|
title: str
|
|
title_embedding: NdArray[32] # type: ignore
|
|
other_emb: NdArray[32] # type: ignore
|
|
year: int
|
|
|
|
embeddings = FakeEmbeddings(size=32)
|
|
|
|
# initialize QdrantDocumentIndex
|
|
qdrant_config = QdrantDocumentIndex.DBConfig(path=":memory:")
|
|
qdrant_db = QdrantDocumentIndex[MyDoc](qdrant_config)
|
|
# index data
|
|
qdrant_db.index(
|
|
[
|
|
MyDoc(
|
|
title=f"My document {i}",
|
|
title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
|
|
other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
|
|
year=i,
|
|
)
|
|
for i in range(100)
|
|
]
|
|
)
|
|
# build a filter query
|
|
filter_query = rest.Filter(
|
|
must=[
|
|
rest.FieldCondition(
|
|
key="year",
|
|
range=rest.Range(
|
|
gte=10,
|
|
lt=90,
|
|
),
|
|
)
|
|
]
|
|
)
|
|
|
|
return qdrant_db, filter_query, embeddings
|
|
|
|
|
|
@pytest.fixture
|
|
def init_in_memory() -> Tuple[InMemoryExactNNIndex, Dict[str, Any], FakeEmbeddings]:
|
|
from docarray import BaseDoc
|
|
from docarray.index import InMemoryExactNNIndex
|
|
|
|
class MyDoc(BaseDoc):
|
|
title: str
|
|
title_embedding: NdArray[32] # type: ignore
|
|
other_emb: NdArray[32] # type: ignore
|
|
year: int
|
|
|
|
embeddings = FakeEmbeddings(size=32)
|
|
|
|
# initialize InMemoryExactNNIndex
|
|
in_memory_db = InMemoryExactNNIndex[MyDoc]()
|
|
# index data
|
|
in_memory_db.index(
|
|
[
|
|
MyDoc(
|
|
title=f"My document {i}",
|
|
title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
|
|
other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
|
|
year=i,
|
|
)
|
|
for i in range(100)
|
|
]
|
|
)
|
|
# build a filter query
|
|
filter_query = {"year": {"$lte": 90}}
|
|
|
|
return in_memory_db, filter_query, embeddings
|
|
|
|
|
|
@pytest.fixture
|
|
def init_hnsw(
|
|
tmp_path: Path,
|
|
) -> Tuple[HnswDocumentIndex, Dict[str, Any], FakeEmbeddings]:
|
|
from docarray import BaseDoc
|
|
from docarray.index import (
|
|
HnswDocumentIndex,
|
|
)
|
|
|
|
class MyDoc(BaseDoc):
|
|
title: str
|
|
title_embedding: NdArray[32] # type: ignore
|
|
other_emb: NdArray[32] # type: ignore
|
|
year: int
|
|
|
|
embeddings = FakeEmbeddings(size=32)
|
|
|
|
# initialize InMemoryExactNNIndex
|
|
hnsw_db = HnswDocumentIndex[MyDoc](work_dir=tmp_path)
|
|
# index data
|
|
hnsw_db.index(
|
|
[
|
|
MyDoc(
|
|
title=f"My document {i}",
|
|
title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
|
|
other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
|
|
year=i,
|
|
)
|
|
for i in range(100)
|
|
]
|
|
)
|
|
# build a filter query
|
|
filter_query = {"year": {"$lte": 90}}
|
|
|
|
return hnsw_db, filter_query, embeddings
|