mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-31 07:41:40 +00:00 
			
		
		
		
	Signed-off-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com> Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com> Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: ccurme <chester.curme@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com> Co-authored-by: ZhangShenao <15201440436@163.com> Co-authored-by: Friso H. Kingma <fhkingma@gmail.com> Co-authored-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Nuno Campos <nuno@langchain.dev> Co-authored-by: Morgante Pell <morgantep@google.com>
		
			
				
	
	
		
			234 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| from pathlib import Path
 | |
| from typing import TYPE_CHECKING, Any, Dict, Generator, Tuple
 | |
| 
 | |
| import numpy as np
 | |
| import pytest
 | |
| from pydantic import Field
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     from docarray.index import (
 | |
|         ElasticDocIndex,
 | |
|         HnswDocumentIndex,
 | |
|         InMemoryExactNNIndex,
 | |
|         QdrantDocumentIndex,
 | |
|         WeaviateDocumentIndex,
 | |
|     )
 | |
|     from docarray.typing import NdArray
 | |
|     from qdrant_client.http import models as rest
 | |
| 
 | |
| from langchain_community.embeddings import FakeEmbeddings
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def init_weaviate() -> (
 | |
|     Generator[
 | |
|         Tuple[WeaviateDocumentIndex, Dict[str, Any], FakeEmbeddings],
 | |
|         None,
 | |
|         None,
 | |
|     ]
 | |
| ):
 | |
|     """
 | |
|     cd tests/integration_tests/vectorstores/docker-compose
 | |
|     docker compose -f weaviate.yml up
 | |
|     """
 | |
|     from docarray import BaseDoc
 | |
|     from docarray.index import (
 | |
|         WeaviateDocumentIndex,
 | |
|     )
 | |
| 
 | |
|     class WeaviateDoc(BaseDoc):
 | |
|         # When initializing the Weaviate index, denote the field
 | |
|         # you want to search on with `is_embedding=True`
 | |
|         title: str
 | |
|         title_embedding: NdArray[32] = Field(is_embedding=True)  # type: ignore
 | |
|         other_emb: NdArray[32]  # type: ignore
 | |
|         year: int
 | |
| 
 | |
|     embeddings = FakeEmbeddings(size=32)
 | |
| 
 | |
|     # initialize WeaviateDocumentIndex
 | |
|     dbconfig = WeaviateDocumentIndex.DBConfig(host="http://localhost:8080")
 | |
|     weaviate_db = WeaviateDocumentIndex[WeaviateDoc](
 | |
|         db_config=dbconfig, index_name="docarray_retriever"
 | |
|     )
 | |
| 
 | |
|     # index data
 | |
|     weaviate_db.index(
 | |
|         [
 | |
|             WeaviateDoc(
 | |
|                 title=f"My document {i}",
 | |
|                 title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
 | |
|                 other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
 | |
|                 year=i,
 | |
|             )
 | |
|             for i in range(100)
 | |
|         ]
 | |
|     )
 | |
|     # build a filter query
 | |
|     filter_query = {"path": ["year"], "operator": "LessThanEqual", "valueInt": "90"}
 | |
| 
 | |
|     yield weaviate_db, filter_query, embeddings
 | |
| 
 | |
|     weaviate_db._client.schema.delete_all()
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def init_elastic() -> (
 | |
|     Generator[Tuple[ElasticDocIndex, Dict[str, Any], FakeEmbeddings], None, None]
 | |
| ):
 | |
|     """
 | |
|     cd tests/integration_tests/vectorstores/docker-compose
 | |
|     docker-compose -f elasticsearch.yml up
 | |
|     """
 | |
|     from docarray import BaseDoc
 | |
|     from docarray.index import (
 | |
|         ElasticDocIndex,
 | |
|     )
 | |
| 
 | |
|     class MyDoc(BaseDoc):
 | |
|         title: str
 | |
|         title_embedding: NdArray[32]  # type: ignore
 | |
|         other_emb: NdArray[32]  # type: ignore
 | |
|         year: int
 | |
| 
 | |
|     embeddings = FakeEmbeddings(size=32)
 | |
| 
 | |
|     # initialize ElasticDocIndex
 | |
|     elastic_db = ElasticDocIndex[MyDoc](
 | |
|         hosts="http://localhost:9200", index_name="docarray_retriever"
 | |
|     )
 | |
|     # index data
 | |
|     elastic_db.index(
 | |
|         [
 | |
|             MyDoc(
 | |
|                 title=f"My document {i}",
 | |
|                 title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
 | |
|                 other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
 | |
|                 year=i,
 | |
|             )
 | |
|             for i in range(100)
 | |
|         ]
 | |
|     )
 | |
|     # build a filter query
 | |
|     filter_query = {"range": {"year": {"lte": 90}}}
 | |
| 
 | |
|     yield elastic_db, filter_query, embeddings
 | |
| 
 | |
|     elastic_db._client.indices.delete(index="docarray_retriever")
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def init_qdrant() -> Tuple[QdrantDocumentIndex, rest.Filter, FakeEmbeddings]:
 | |
|     from docarray import BaseDoc
 | |
|     from docarray.index import QdrantDocumentIndex
 | |
| 
 | |
|     class MyDoc(BaseDoc):
 | |
|         title: str
 | |
|         title_embedding: NdArray[32]  # type: ignore
 | |
|         other_emb: NdArray[32]  # type: ignore
 | |
|         year: int
 | |
| 
 | |
|     embeddings = FakeEmbeddings(size=32)
 | |
| 
 | |
|     # initialize QdrantDocumentIndex
 | |
|     qdrant_config = QdrantDocumentIndex.DBConfig(path=":memory:")
 | |
|     qdrant_db = QdrantDocumentIndex[MyDoc](qdrant_config)
 | |
|     # index data
 | |
|     qdrant_db.index(
 | |
|         [
 | |
|             MyDoc(
 | |
|                 title=f"My document {i}",
 | |
|                 title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
 | |
|                 other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
 | |
|                 year=i,
 | |
|             )
 | |
|             for i in range(100)
 | |
|         ]
 | |
|     )
 | |
|     # build a filter query
 | |
|     filter_query = rest.Filter(
 | |
|         must=[
 | |
|             rest.FieldCondition(
 | |
|                 key="year",
 | |
|                 range=rest.Range(
 | |
|                     gte=10,
 | |
|                     lt=90,
 | |
|                 ),
 | |
|             )
 | |
|         ]
 | |
|     )
 | |
| 
 | |
|     return qdrant_db, filter_query, embeddings
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def init_in_memory() -> Tuple[InMemoryExactNNIndex, Dict[str, Any], FakeEmbeddings]:
 | |
|     from docarray import BaseDoc
 | |
|     from docarray.index import InMemoryExactNNIndex
 | |
| 
 | |
|     class MyDoc(BaseDoc):
 | |
|         title: str
 | |
|         title_embedding: NdArray[32]  # type: ignore
 | |
|         other_emb: NdArray[32]  # type: ignore
 | |
|         year: int
 | |
| 
 | |
|     embeddings = FakeEmbeddings(size=32)
 | |
| 
 | |
|     # initialize InMemoryExactNNIndex
 | |
|     in_memory_db = InMemoryExactNNIndex[MyDoc]()
 | |
|     # index data
 | |
|     in_memory_db.index(
 | |
|         [
 | |
|             MyDoc(
 | |
|                 title=f"My document {i}",
 | |
|                 title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
 | |
|                 other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
 | |
|                 year=i,
 | |
|             )
 | |
|             for i in range(100)
 | |
|         ]
 | |
|     )
 | |
|     # build a filter query
 | |
|     filter_query = {"year": {"$lte": 90}}
 | |
| 
 | |
|     return in_memory_db, filter_query, embeddings
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def init_hnsw(
 | |
|     tmp_path: Path,
 | |
| ) -> Tuple[HnswDocumentIndex, Dict[str, Any], FakeEmbeddings]:
 | |
|     from docarray import BaseDoc
 | |
|     from docarray.index import (
 | |
|         HnswDocumentIndex,
 | |
|     )
 | |
| 
 | |
|     class MyDoc(BaseDoc):
 | |
|         title: str
 | |
|         title_embedding: NdArray[32]  # type: ignore
 | |
|         other_emb: NdArray[32]  # type: ignore
 | |
|         year: int
 | |
| 
 | |
|     embeddings = FakeEmbeddings(size=32)
 | |
| 
 | |
|     # initialize InMemoryExactNNIndex
 | |
|     hnsw_db = HnswDocumentIndex[MyDoc](work_dir=tmp_path)
 | |
|     # index data
 | |
|     hnsw_db.index(
 | |
|         [
 | |
|             MyDoc(
 | |
|                 title=f"My document {i}",
 | |
|                 title_embedding=np.array(embeddings.embed_query(f"fake emb {i}")),
 | |
|                 other_emb=np.array(embeddings.embed_query(f"other fake emb {i}")),
 | |
|                 year=i,
 | |
|             )
 | |
|             for i in range(100)
 | |
|         ]
 | |
|     )
 | |
|     # build a filter query
 | |
|     filter_query = {"year": {"$lte": 90}}
 | |
| 
 | |
|     return hnsw_db, filter_query, embeddings
 |