diff --git a/docs/extras/modules/data_connection/retrievers/integrations/weaviate-hybrid.ipynb b/docs/extras/modules/data_connection/retrievers/integrations/weaviate-hybrid.ipynb index f790dd7bda4..f256d49d068 100644 --- a/docs/extras/modules/data_connection/retrievers/integrations/weaviate-hybrid.ipynb +++ b/docs/extras/modules/data_connection/retrievers/integrations/weaviate-hybrid.ipynb @@ -48,9 +48,9 @@ "import os\n", "\n", "WEAVIATE_URL = os.getenv(\"WEAVIATE_URL\")\n", + "auth_client_secret = (weaviate.AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\")),)\n", "client = weaviate.Client(\n", " url=WEAVIATE_URL,\n", - " auth_client_secret=weaviate.AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\")),\n", " additional_headers={\n", " \"X-Openai-Api-Key\": os.getenv(\"OPENAI_API_KEY\"),\n", " },\n", @@ -68,10 +68,7 @@ { "name": "stderr", "output_type": "stream", - "text": [ - "/workspaces/langchain/langchain/vectorstores/analyticdb.py:20: MovedIn20Warning: The ``declarative_base()`` function is now available as sqlalchemy.orm.declarative_base(). (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " Base = declarative_base() # type: Any\n" - ] + "text": [] } ], "source": [ @@ -87,7 +84,11 @@ "outputs": [], "source": [ "retriever = WeaviateHybridSearchRetriever(\n", - " client, index_name=\"LangChain\", text_key=\"text\"\n", + " client=client,\n", + " index_name=\"LangChain\",\n", + " text_key=\"text\",\n", + " attributes=[],\n", + " create_schema_if_missing=True,\n", ")" ] }, @@ -152,11 +153,11 @@ { "data": { "text/plain": [ - "['eda16d7d-437d-4613-84ae-c2e38705ec7a',\n", - " '04b501bf-192b-4e72-be77-2fbbe7e67ebf',\n", - " '18a1acdb-23b7-4482-ab04-a6c2ed51de77',\n", - " '88e82cc3-c020-4b5a-b3c6-ca7cf3fc6a04',\n", - " 'f6abd9d5-32ed-46c4-bd08-f8d0f7c9fc95']" + "['3a27b0a5-8dbb-4fee-9eba-8b6bc2c252be',\n", + " 'eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907',\n", + " '7ebbdae7-1061-445f-a046-1989f2343d8f',\n", + " 'c2ab315b-3cab-467f-b23a-b26ed186318d',\n", + " 'b83765f2-e5d2-471f-8c02-c3350ade4c4f']" ] }, "execution_count": 6, @@ -238,6 +239,41 @@ " },\n", ")" ] + }, + { + "cell_type": "markdown", + "id": "5ae2899e", + "metadata": {}, + "source": [ + "Do a hybrid search with scores:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4fffd0af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907 contributed 0.00819672131147541 to the score\\n(hybrid) Document eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907 contributed 0.00819672131147541 to the score', 'score': '0.016393442'}}),\n", + " Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document b83765f2-e5d2-471f-8c02-c3350ade4c4f contributed 0.0078125 to the score\\n(hybrid) Document b83765f2-e5d2-471f-8c02-c3350ade4c4f contributed 0.008064516129032258 to the score', 'score': '0.015877016'}}),\n", + " Document(page_content='In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.', metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document 7ebbdae7-1061-445f-a046-1989f2343d8f contributed 0.008064516129032258 to the score\\n(hybrid) Document 7ebbdae7-1061-445f-a046-1989f2343d8f contributed 0.0078125 to the score', 'score': '0.015877016'}}),\n", + " Document(page_content='A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.', metadata={'_additional': {'explainScore': '(vector) [-0.0071824766 -0.0006682752 0.001723625 -0.01897258 -0.0045127636 0.0024410256 -0.020503938 0.013768672 0.009520169 -0.037972264]... \\n(hybrid) Document 3a27b0a5-8dbb-4fee-9eba-8b6bc2c252be contributed 0.007936507936507936 to the score', 'score': '0.007936508'}})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\n", + " \"AI integration in society\",\n", + " score=True,\n", + ")" + ] } ], "metadata": { @@ -256,7 +292,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/langchain/retrievers/weaviate_hybrid_search.py b/langchain/retrievers/weaviate_hybrid_search.py index d6d00d5326c..994d02e212c 100644 --- a/langchain/retrievers/weaviate_hybrid_search.py +++ b/langchain/retrievers/weaviate_hybrid_search.py @@ -98,12 +98,16 @@ class WeaviateHybridSearchRetriever(BaseRetriever): *, run_manager: CallbackManagerForRetrieverRun, where_filter: Optional[Dict[str, object]] = None, + score: bool = False, ) -> List[Document]: """Look up similar documents in Weaviate.""" query_obj = self.client.query.get(self.index_name, self.attributes) if where_filter: query_obj = query_obj.with_where(where_filter) + if score: + query_obj = query_obj.with_additional(["score", "explainScore"]) + result = query_obj.with_hybrid(query, alpha=self.alpha).with_limit(self.k).do() if "errors" in result: raise ValueError(f"Error during query: {result['errors']}") diff --git a/tests/integration_tests/retrievers/test_weaviate_hybrid_search.py b/tests/integration_tests/retrievers/test_weaviate_hybrid_search.py index 581bfe9e231..267820cf2ac 100644 --- a/tests/integration_tests/retrievers/test_weaviate_hybrid_search.py +++ b/tests/integration_tests/retrievers/test_weaviate_hybrid_search.py @@ -61,6 +61,29 @@ class TestWeaviateHybridSearchRetriever: Document(page_content="bar", metadata={"page": 1}), ] + @pytest.mark.vcr(ignore_localhost=True) + def test_get_relevant_documents_with_score(self, weaviate_url: str) -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + + client = Client(weaviate_url) + + retriever = WeaviateHybridSearchRetriever( + client=client, + index_name=f"LangChain_{uuid4().hex}", + text_key="text", + attributes=["page"], + ) + for i, text in enumerate(texts): + retriever.add_documents( + [Document(page_content=text, metadata=metadatas[i])] + ) + + output = retriever.get_relevant_documents("foo", score=True) + for doc in output: + assert "_additional" in doc.metadata + @pytest.mark.vcr(ignore_localhost=True) def test_get_relevant_documents_with_filter(self, weaviate_url: str) -> None: """Test end to end construction and MRR search."""