This commit is contained in:
Harrison Chase
2023-04-15 18:25:17 -07:00
7 changed files with 387 additions and 188 deletions

View File

@@ -46,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 2,
"id": "c10e7696",
"metadata": {},
"outputs": [],
@@ -62,17 +62,17 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 3,
"id": "86dbadb9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['e1990f14-c66c-4de1-961e-b35d251dbc24']"
"['73f8f585-9536-4240-aef7-cea205b336bd']"
]
},
"execution_count": 28,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -85,29 +85,24 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 4,
"id": "a580be32",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.999943851187518\n",
"1.7783825087097196\n",
"0.9997499080672996\n",
"1.997134532390176\n"
"ename": "ValueError",
"evalue": "normalize_score_fn must be provided to FAISS constructor to normalize scores",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mretriever\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_relevant_documents\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhello world\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/workplace/langchain/langchain/retrievers/time_weighted_retriever.py:94\u001b[0m, in \u001b[0;36mTimeWeightedVectorStoreRetriever.get_relevant_documents\u001b[0;34m(self, query)\u001b[0m\n\u001b[1;32m 89\u001b[0m docs_and_scores \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 90\u001b[0m doc\u001b[38;5;241m.\u001b[39mmetadata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbuffer_idx\u001b[39m\u001b[38;5;124m\"\u001b[39m]: (doc, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_salience)\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmemory_stream[\u001b[38;5;241m-\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mk :]\n\u001b[1;32m 92\u001b[0m }\n\u001b[1;32m 93\u001b[0m \u001b[38;5;66;03m# If a doc is considered salient, update the salience score\u001b[39;00m\n\u001b[0;32m---> 94\u001b[0m docs_and_scores\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_salient_docs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 95\u001b[0m rescored_docs \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 96\u001b[0m (doc, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_combined_score(doc, salience, current_time))\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m doc, salience \u001b[38;5;129;01min\u001b[39;00m docs_and_scores\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 98\u001b[0m ]\n\u001b[1;32m 99\u001b[0m rescored_docs\u001b[38;5;241m.\u001b[39msort(key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[38;5;241m1\u001b[39m], reverse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"File \u001b[0;32m~/workplace/langchain/langchain/retrievers/time_weighted_retriever.py:75\u001b[0m, in \u001b[0;36mTimeWeightedVectorStoreRetriever.get_salient_docs\u001b[0;34m(self, query)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return documents that are salient to the query.\"\"\"\u001b[39;00m\n\u001b[1;32m 73\u001b[0m docs_and_scores: List[Tuple[Document, \u001b[38;5;28mfloat\u001b[39m]]\n\u001b[1;32m 74\u001b[0m docs_and_scores \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m---> 75\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvectorstore\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msimilarity_search_with_normalized_similarities\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch_kwargs\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 78\u001b[0m )\n\u001b[1;32m 79\u001b[0m results \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m fetched_doc, cosine_distance \u001b[38;5;129;01min\u001b[39;00m docs_and_scores:\n",
"File \u001b[0;32m~/workplace/langchain/langchain/vectorstores/base.py:94\u001b[0m, in \u001b[0;36mVectorStore.similarity_search_with_normalized_similarities\u001b[0;34m(self, query, k, **kwargs)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msimilarity_search_with_normalized_similarities\u001b[39m(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 86\u001b[0m query: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 87\u001b[0m k: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m4\u001b[39m,\n\u001b[1;32m 88\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 89\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Tuple[Document, \u001b[38;5;28mfloat\u001b[39m]]:\n\u001b[1;32m 90\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return docs and similarity scores, normalized on a scale from 0 to 1.\u001b[39;00m\n\u001b[1;32m 91\u001b[0m \n\u001b[1;32m 92\u001b[0m \u001b[38;5;124;03m 0 is dissimilar, 1 is most similar.\u001b[39;00m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 94\u001b[0m docs_and_similarities \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_similarity_search_with_normalized_similarities\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(\n\u001b[1;32m 98\u001b[0m similarity \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0.0\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m similarity \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1.0\u001b[39m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _, similarity \u001b[38;5;129;01min\u001b[39;00m docs_and_similarities\n\u001b[1;32m 100\u001b[0m ):\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 102\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNormalized similarity scores must be between\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m 0 and 1, got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdocs_and_similarities\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 104\u001b[0m )\n",
"File \u001b[0;32m~/workplace/langchain/langchain/vectorstores/faiss.py:435\u001b[0m, in \u001b[0;36mFAISS._similarity_search_with_normalized_similarities\u001b[0;34m(self, query, k, **kwargs)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return docs and their similarity scores on a scale from 0 to 1.\"\"\"\u001b[39;00m\n\u001b[1;32m 434\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnormalize_score_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 435\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 436\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnormalize_score_fn must be provided to\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m FAISS constructor to normalize scores\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 438\u001b[0m )\n\u001b[1;32m 439\u001b[0m docs_and_scores \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimilarity_search_with_score(query, k\u001b[38;5;241m=\u001b[39mk)\n\u001b[1;32m 440\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [(doc, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnormalize_score_fn(score)) \u001b[38;5;28;01mfor\u001b[39;00m doc, score \u001b[38;5;129;01min\u001b[39;00m docs_and_scores]\n",
"\u001b[0;31mValueError\u001b[0m: normalize_score_fn must be provided to FAISS constructor to normalize scores"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='hello world', metadata={'last_accessed_at': datetime.datetime(2023, 4, 13, 23, 35, 40, 308626), 'created_at': datetime.datetime(2023, 4, 13, 23, 35, 38, 583497), 'buffer_idx': 0})]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [