mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-01 00:49:25 +00:00
community[minor]: Add indexing via locality sensitive hashing to the Yellowbrick vector store (#20856)
- **Description:** Add LSH-based indexing to the Yellowbrick vector store module - **Twitter handle:** @markcusack --------- Co-authored-by: markcusack <markcusack@markcusacksmac.lan> Co-authored-by: markcusack <markcusack@Mark-Cusack-sMac.local> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
a2fdabdad2
commit
060987d755
@ -98,7 +98,7 @@
|
||||
"import psycopg2\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from langchain.chains import LLMChain, RetrievalQAWithSourcesChain\n",
|
||||
"from langchain_community.docstore.document import Document\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import Yellowbrick\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
@ -209,14 +209,12 @@
|
||||
"\n",
|
||||
"# Define the SQL statement to create a table\n",
|
||||
"create_table_query = f\"\"\"\n",
|
||||
"CREATE TABLE if not exists {embedding_table} (\n",
|
||||
" id uuid,\n",
|
||||
" embedding_id integer,\n",
|
||||
" text character varying(60000),\n",
|
||||
" metadata character varying(1024),\n",
|
||||
" embedding double precision\n",
|
||||
"CREATE TABLE IF NOT EXISTS {embedding_table} (\n",
|
||||
" doc_id uuid NOT NULL,\n",
|
||||
" embedding_id smallint NOT NULL,\n",
|
||||
" embedding double precision NOT NULL\n",
|
||||
")\n",
|
||||
"DISTRIBUTE ON (id);\n",
|
||||
"DISTRIBUTE ON (doc_id);\n",
|
||||
"truncate table {embedding_table};\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
@ -257,6 +255,8 @@
|
||||
" f\"postgres://{urlparse.quote(YBUSER)}:{YBPASSWORD}@{YBHOST}:5432/{YB_DOC_DATABASE}\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(yellowbrick_doc_connection_string)\n",
|
||||
"\n",
|
||||
"# Establish a connection to the Yellowbrick database\n",
|
||||
"conn = psycopg2.connect(yellowbrick_doc_connection_string)\n",
|
||||
"\n",
|
||||
@ -324,7 +324,7 @@
|
||||
"vector_store = Yellowbrick.from_documents(\n",
|
||||
" documents=split_docs,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" connection_string=yellowbrick_connection_string,\n",
|
||||
" connection_info=yellowbrick_connection_string,\n",
|
||||
" table=embedding_table,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@ -403,6 +403,88 @@
|
||||
"print_result_sources(\"Whats an easy way to add users in bulk to Yellowbrick?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f39fd30",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Part 6: Introducing an Index to Increase Performance\n",
|
||||
"\n",
|
||||
"Yellowbrick also supports indexing using the Locality-Sensitive Hashing approach. This is an approximate nearest-neighbor search technique, and allows one to trade off similarity search time at the expense of accuracy. The index introduces two new tunable parameters:\n",
|
||||
"\n",
|
||||
"- The number of hyperplanes, which is provided as an argument to `create_lsh_index(num_hyperplanes)`. The more documents, the more hyperplanes are needed. LSH is a form of dimensionality reduction. The original embeddings are transformed into lower dimensional vectors where the number of components is the same as the number of hyperplanes.\n",
|
||||
"- The Hamming distance, an integer representing the breadth of the search. Smaller Hamming distances result in faster retreival but lower accuracy.\n",
|
||||
"\n",
|
||||
"Here's how you can create an index on the embeddings we loaded into Yellowbrick. We'll also re-run the previous chat session, but this time the retrieval will use the index. Note that for such a small number of documents, you won't see the benefit of indexing in terms of performance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "02ba61c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_template = \"\"\"Use the following pieces of context to answer the users question.\n",
|
||||
"Take note of the sources and include them in the answer in the format: \"SOURCES: source1 source2\", use \"SOURCES\" in capital letters regardless of the number of sources.\n",
|
||||
"If you don't know the answer, just say that \"I don't know\", don't try to make up an answer.\n",
|
||||
"----------------\n",
|
||||
"{summaries}\"\"\"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessagePromptTemplate.from_template(system_template),\n",
|
||||
" HumanMessagePromptTemplate.from_template(\"{question}\"),\n",
|
||||
"]\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(messages)\n",
|
||||
"\n",
|
||||
"vector_store = Yellowbrick(\n",
|
||||
" OpenAIEmbeddings(),\n",
|
||||
" yellowbrick_connection_string,\n",
|
||||
" embedding_table, # Change the table name to reflect your embeddings\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"lsh_params = Yellowbrick.IndexParams(\n",
|
||||
" Yellowbrick.IndexType.LSH, {\"num_hyperplanes\": 8, \"hamming_distance\": 2}\n",
|
||||
")\n",
|
||||
"vector_store.create_index(lsh_params)\n",
|
||||
"\n",
|
||||
"chain_type_kwargs = {\"prompt\": prompt}\n",
|
||||
"llm = ChatOpenAI(\n",
|
||||
" model_name=\"gpt-3.5-turbo\", # Modify model_name if you have access to GPT-4\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=256,\n",
|
||||
")\n",
|
||||
"chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
|
||||
" llm=llm,\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=vector_store.as_retriever(\n",
|
||||
" k=5, search_kwargs={\"index_params\": lsh_params}\n",
|
||||
" ),\n",
|
||||
" return_source_documents=True,\n",
|
||||
" chain_type_kwargs=chain_type_kwargs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def print_result_sources(query):\n",
|
||||
" result = chain(query)\n",
|
||||
" output_text = f\"\"\"### Question: \n",
|
||||
" {query}\n",
|
||||
" ### Answer: \n",
|
||||
" {result['answer']}\n",
|
||||
" ### Sources: \n",
|
||||
" {result['sources']}\n",
|
||||
" ### All relevant sources:\n",
|
||||
" {', '.join(list(set([doc.metadata['source'] for doc in result['source_documents']])))}\n",
|
||||
" \"\"\"\n",
|
||||
" display(Markdown(output_text))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Use the chain to query\n",
|
||||
"\n",
|
||||
"print_result_sources(\"How many databases can be in a Yellowbrick Instance?\")\n",
|
||||
"\n",
|
||||
"print_result_sources(\"Whats an easy way to add users in bulk to Yellowbrick?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "697c8a38",
|
||||
@ -418,9 +500,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain_venv",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "langchain_venv"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
@ -60,7 +60,7 @@
|
||||
" * document addition by id (`add_documents` method with `ids` argument)\n",
|
||||
" * delete by id (`delete` method with `ids` argument)\n",
|
||||
"\n",
|
||||
"Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AzureCosmosDBVectorSearch`, `AzureSearch`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `LanceDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `UpstashVectorStore`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`, `TencentVectorDB`, `OpenSearchVectorSearch`.\n",
|
||||
"Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AzureCosmosDBVectorSearch`, `AzureSearch`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `LanceDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `UpstashVectorStore`, `Vald`, `VDMS`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`, `TencentVectorDB`, `OpenSearchVectorSearch`, `Yellowbrick`.\n",
|
||||
" \n",
|
||||
"## Caution\n",
|
||||
"\n",
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
import pytest
|
||||
@ -5,60 +6,256 @@ import pytest
|
||||
from langchain_community.docstore.document import Document
|
||||
from langchain_community.vectorstores import Yellowbrick
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
FakeEmbeddings,
|
||||
ConsistentFakeEmbeddings,
|
||||
fake_texts,
|
||||
)
|
||||
|
||||
YELLOWBRICK_URL = "postgres://username:password@host:port/database"
|
||||
YELLOWBRICK_TABLE = "test_table"
|
||||
YELLOWBRICK_CONTENT = "test_table_content"
|
||||
YELLOWBRICK_SCHEMA = "test_schema"
|
||||
|
||||
|
||||
def _yellowbrick_vector_from_texts(
|
||||
metadatas: Optional[List[dict]] = None, drop: bool = True
|
||||
) -> Yellowbrick:
|
||||
return Yellowbrick.from_texts(
|
||||
db = Yellowbrick.from_texts(
|
||||
fake_texts,
|
||||
FakeEmbeddings(),
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas,
|
||||
YELLOWBRICK_URL,
|
||||
YELLOWBRICK_TABLE,
|
||||
table=YELLOWBRICK_TABLE,
|
||||
schema=YELLOWBRICK_SCHEMA,
|
||||
drop=drop,
|
||||
)
|
||||
db.logger.setLevel(logging.DEBUG)
|
||||
return db
|
||||
|
||||
|
||||
def _yellowbrick_vector_from_texts_no_schema(
|
||||
metadatas: Optional[List[dict]] = None, drop: bool = True
|
||||
) -> Yellowbrick:
|
||||
db = Yellowbrick.from_texts(
|
||||
fake_texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas,
|
||||
YELLOWBRICK_URL,
|
||||
table=YELLOWBRICK_TABLE,
|
||||
drop=drop,
|
||||
)
|
||||
db.logger.setLevel(logging.DEBUG)
|
||||
return db
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearch = _yellowbrick_vector_from_texts()
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
docsearch.drop(YELLOWBRICK_TABLE)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_add_text() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
texts = ["oof"]
|
||||
docsearch.add_texts(texts)
|
||||
output = docsearch.similarity_search("oof", k=1)
|
||||
assert output == [Document(page_content="oof", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_delete() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
texts = ["oof"]
|
||||
added_docs = docsearch.add_texts(texts)
|
||||
output = docsearch.similarity_search("oof", k=1)
|
||||
assert output == [Document(page_content="oof", metadata={})]
|
||||
docsearch.delete(added_docs)
|
||||
output = docsearch.similarity_search("oof", k=1)
|
||||
assert output != [Document(page_content="oof", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_delete_all() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
texts = ["oof"]
|
||||
docsearch.add_texts(texts)
|
||||
output = docsearch.similarity_search("oof", k=1)
|
||||
assert output == [Document(page_content="oof", metadata={})]
|
||||
docsearch.delete(delete_all=True)
|
||||
output = docsearch.similarity_search("oof", k=1)
|
||||
assert output != [Document(page_content="oof", metadata={})]
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output != [Document(page_content="foo", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_lsh_search() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
index_params = Yellowbrick.IndexParams(
|
||||
Yellowbrick.IndexType.LSH, {"num_hyperplanes": 10, "hamming_distance": 0}
|
||||
)
|
||||
docsearch.drop_index(index_params)
|
||||
docsearch.create_index(index_params)
|
||||
output = docsearch.similarity_search("foo", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
docsearch.drop_index(index_params=index_params)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_lsh_search_update() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
index_params = Yellowbrick.IndexParams(
|
||||
Yellowbrick.IndexType.LSH, {"num_hyperplanes": 10, "hamming_distance": 0}
|
||||
)
|
||||
docsearch.drop_index(index_params)
|
||||
docsearch.create_index(index_params)
|
||||
output = docsearch.similarity_search("foo", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
texts = ["oof"]
|
||||
docsearch.add_texts(texts, index_params=index_params)
|
||||
output = docsearch.similarity_search("oof", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="oof", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
docsearch.drop_index(index_params=index_params)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_lsh_delete() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
index_params = Yellowbrick.IndexParams(
|
||||
Yellowbrick.IndexType.LSH, {"num_hyperplanes": 10, "hamming_distance": 0}
|
||||
)
|
||||
docsearch.drop_index(index_params)
|
||||
docsearch.create_index(index_params)
|
||||
output = docsearch.similarity_search("foo", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
texts = ["oof"]
|
||||
added_docs = docsearch.add_texts(texts, index_params=index_params)
|
||||
output = docsearch.similarity_search("oof", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="oof", metadata={})]
|
||||
docsearch.delete(added_docs)
|
||||
output = docsearch.similarity_search("oof", k=1, index_params=index_params)
|
||||
assert output != [Document(page_content="oof", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
docsearch.drop_index(index_params=index_params)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_lsh_delete_all() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
index_params = Yellowbrick.IndexParams(
|
||||
Yellowbrick.IndexType.LSH, {"num_hyperplanes": 10, "hamming_distance": 0}
|
||||
)
|
||||
docsearch.drop_index(index_params)
|
||||
docsearch.create_index(index_params)
|
||||
output = docsearch.similarity_search("foo", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="foo", metadata={})]
|
||||
texts = ["oof"]
|
||||
docsearch.add_texts(texts, index_params=index_params)
|
||||
output = docsearch.similarity_search("oof", k=1, index_params=index_params)
|
||||
assert output == [Document(page_content="oof", metadata={})]
|
||||
docsearch.delete(delete_all=True)
|
||||
output = docsearch.similarity_search("oof", k=1, index_params=index_params)
|
||||
assert output != [Document(page_content="oof", metadata={})]
|
||||
output = docsearch.similarity_search("foo", k=1, index_params=index_params)
|
||||
assert output != [Document(page_content="foo", metadata={})]
|
||||
docsearch.drop(table=YELLOWBRICK_TABLE, schema=docsearch._schema)
|
||||
docsearch.drop(table=YELLOWBRICK_CONTENT, schema=docsearch._schema)
|
||||
docsearch.drop_index(index_params=index_params)
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_with_score() -> None:
|
||||
"""Test end to end construction and search with scores and IDs."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _yellowbrick_vector_from_texts(metadatas=metadatas)
|
||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||
docs = [o[0] for o in output]
|
||||
distances = [o[1] for o in output]
|
||||
docsearch.drop(YELLOWBRICK_TABLE)
|
||||
assert docs == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="bar", metadata={"page": 1}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
assert distances[0] > distances[1] > distances[2]
|
||||
for docsearch in docsearches:
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _yellowbrick_vector_from_texts(metadatas=metadatas)
|
||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||
docs = [o[0] for o in output]
|
||||
distances = [o[1] for o in output]
|
||||
assert docs == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="bar", metadata={"page": 1}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert distances[0] > distances[1] > distances[2]
|
||||
|
||||
|
||||
@pytest.mark.requires("yb-vss")
|
||||
def test_yellowbrick_add_extra() -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _yellowbrick_vector_from_texts(metadatas=metadatas)
|
||||
docsearch.add_texts(texts, metadatas)
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
docsearch.drop(YELLOWBRICK_TABLE)
|
||||
assert len(output) == 6
|
||||
docsearches = [
|
||||
_yellowbrick_vector_from_texts(),
|
||||
_yellowbrick_vector_from_texts_no_schema(),
|
||||
]
|
||||
for docsearch in docsearches:
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _yellowbrick_vector_from_texts(metadatas=metadatas)
|
||||
docsearch.add_texts(texts, metadatas)
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
|
@ -95,6 +95,7 @@ def test_compatible_vectorstore_documentation() -> None:
|
||||
"VespaStore",
|
||||
"VLite",
|
||||
"Weaviate",
|
||||
"Yellowbrick",
|
||||
"ZepVectorStore",
|
||||
"Zilliz",
|
||||
"Lantern",
|
||||
|
Loading…
Reference in New Issue
Block a user