This commit is contained in:
Dev 2049
2023-04-19 12:23:12 -07:00
parent 61f1177721
commit b86aedd07b
11 changed files with 31 additions and 37 deletions

View File

@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "28e8dc12",
"metadata": {},
"outputs": [],
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "9fbcc58f",
"metadata": {},
"outputs": [
@@ -124,22 +124,22 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 4,
"id": "9a658023",
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.retrievers import ContextualCompressionRetriever\n",
"from langchain.retrievers.document_filters import LLMChainDocumentCompressor\n",
"from langchain.retrievers.document_filters import LLMChainExtractionDocumentFilter\n",
"\n",
"_filter = LLMChainDocumentCompressor.from_llm(OpenAI(temperature=0))\n",
"_filter = LLMChainExtractionDocumentFilter.from_llm(OpenAI(temperature=0))\n",
"compression_retriever = ContextualCompressionRetriever(base_filter=_filter, base_retriever=retriever)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "398622c5",
"metadata": {},
"outputs": [
@@ -175,7 +175,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 6,
"id": "2a150a63",
"metadata": {},
"outputs": [],
@@ -207,7 +207,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 7,
"id": "3ceab64a",
"metadata": {},
"outputs": [
@@ -245,17 +245,9 @@
"id": "87dcc583",
"metadata": {},
"source": [
"\n",
"# Results\n",
"Here we create a sequence where we first split the initial documents into smaller documents, then we drop redundant documents, and finally we drop any documents not relevant to the query. The results aren't quite as good as the LLM-powered filter above, but we were able to do all this filtering much more quickly and cheaply by only using Embedding models."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdb63b80",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {