diff --git a/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb b/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb index b54746a27bd..98a84410930 100644 --- a/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb +++ b/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb @@ -46,18 +46,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "bcbe04d9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using embedded DuckDB without persistence: data will be transient\n" - ] - } - ], + "outputs": [], "source": [ "docs = [\n", " Document(page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\", metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"}),\n", @@ -65,7 +57,7 @@ " Document(page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\", metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6}),\n", " Document(page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\", metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3}),\n", " Document(page_content=\"Toys come alive and have a blast doing so\", metadata={\"year\": 1995, \"genre\": \"animated\"}),\n", - " Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"rating\": 9.9, \"director\": \"Andrei Tarkovsky\", \"genre\": \"science fiction\", \"rating\": 9.9})\n", + " Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"director\": \"Andrei Tarkovsky\", \"genre\": \"science fiction\", \"rating\": 9.9})\n", "]\n", "vectorstore = Chroma.from_documents(\n", " docs, embeddings\n", @@ -302,7 +294,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb b/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb index 7668bf34edb..43e9736a18f 100644 --- a/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb +++ b/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb @@ -32,19 +32,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "3eb9c9a4", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/pinecone/index.py:4: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", - " from tqdm.autonotebook import tqdm\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "\n", @@ -83,7 +74,7 @@ " Document(page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\", metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6}),\n", " Document(page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\", metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3}),\n", " Document(page_content=\"Toys come alive and have a blast doing so\", metadata={\"year\": 1995, \"genre\": \"animated\"}),\n", - " Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"rating\": 9.9, \"director\": \"Andrei Tarkovsky\", \"genre\": [\"science fiction\", \"thriller\"], \"rating\": 9.9})\n", + " Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"director\": \"Andrei Tarkovsky\", \"genre\": [\"science fiction\", \"thriller\"], \"rating\": 9.9})\n", "]\n", "vectorstore = Pinecone.from_documents(\n", " docs, embeddings, index_name=\"langchain-self-retriever-demo\"\n", @@ -320,7 +311,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/tests/integration_tests/retrievers/test_self_query.py b/tests/integration_tests/retrievers/test_self_query.py new file mode 100644 index 00000000000..474004b3580 --- /dev/null +++ b/tests/integration_tests/retrievers/test_self_query.py @@ -0,0 +1,84 @@ +import pytest + +from langchain.chains.query_constructor.base import AttributeInfo +from langchain.embeddings import FakeEmbeddings +from langchain.llms import OpenAI +from langchain.retrievers.self_query.base import SelfQueryRetriever +from langchain.schema import Document +from langchain.vectorstores import Chroma + + +@pytest.fixture +def retriever() -> SelfQueryRetriever: + docs = [ + Document( + page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose", + metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"}, + ), + Document( + page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", + metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2}, + ), + Document( + page_content=( + "A psychologist / detective gets lost in a series of dreams within" + " dreams within dreams and Inception reused the idea" + ), + metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6}, + ), + Document( + page_content=( + "A bunch of normal-sized women are supremely wholesome and some men " + "pine after them" + ), + metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3}, + ), + Document( + page_content="Toys come alive and have a blast doing so", + metadata={"year": 1995, "genre": "animated"}, + ), + Document( + page_content="Three men walk into the Zone, three men walk out of the Zone", + metadata={ + "year": 1979, + "director": "Andrei Tarkovsky", + "genre": "science fiction", + "rating": 9.9, + }, + ), + ] + vectorstore = Chroma.from_documents(docs, FakeEmbeddings(size=10)) + metadata_field_info = [ + AttributeInfo( + name="genre", + description="The genre of the movie", + type="string or list[string]", + ), + AttributeInfo( + name="year", + description="The year the movie was released", + type="integer", + ), + AttributeInfo( + name="director", + description="The name of the movie director", + type="string", + ), + AttributeInfo( + name="rating", description="A 1-10 rating for the movie", type="float" + ), + ] + retriever = SelfQueryRetriever.from_llm( + OpenAI(temperature=0), + vectorstore, + "Brief summary of a movie", + metadata_field_info, + ) + return retriever + + +def test_get_relevant_documents(retriever: SelfQueryRetriever) -> None: + docs = retriever.get_relevant_documents( + "What are some movies about dinosaurs that came out before 2000" + ) + assert len(docs) > 0 diff --git a/tests/persist_dir/chroma-collections.parquet b/tests/persist_dir/chroma-collections.parquet new file mode 100644 index 00000000000..08a73a894fb Binary files /dev/null and b/tests/persist_dir/chroma-collections.parquet differ diff --git a/tests/persist_dir/chroma-embeddings.parquet b/tests/persist_dir/chroma-embeddings.parquet new file mode 100644 index 00000000000..bc531335827 Binary files /dev/null and b/tests/persist_dir/chroma-embeddings.parquet differ