simplify csv args (#4182)

This commit is contained in:
Harrison Chase 2023-05-05 09:22:08 -07:00 committed by GitHub
parent 3095546851
commit 26534457f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 27 deletions

View File

@ -14,7 +14,7 @@
"id": "68e75fb9", "id": "68e75fb9",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Creating a Pinecone index\n", "## Creating a Chroma vectorstore\n",
"First we'll want to create a Chroma VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n", "First we'll want to create a Chroma VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
"\n", "\n",
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`)" "NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`)"
@ -46,7 +46,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 3,
"id": "bcbe04d9", "id": "bcbe04d9",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -77,13 +77,13 @@
"id": "5ecaab6d", "id": "5ecaab6d",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Creating our self-querying retriever\n", "## Creating our self-querying retriever\n",
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents." "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"id": "86e34dbf", "id": "86e34dbf",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -124,13 +124,13 @@
"id": "ea9df8d4", "id": "ea9df8d4",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Testing it out\n", "## Testing it out\n",
"And now we can try actually using our retriever!" "And now we can try actually using our retriever!"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 5,
"id": "38a126e9", "id": "38a126e9",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -150,7 +150,7 @@
" Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2})]" " Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2})]"
] ]
}, },
"execution_count": 6, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -162,7 +162,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 6,
"id": "fc3f1e6e", "id": "fc3f1e6e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -180,7 +180,7 @@
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]" " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]"
] ]
}, },
"execution_count": 7, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -192,7 +192,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"id": "b19d4da0", "id": "b19d4da0",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -209,7 +209,7 @@
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3})]" "[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3})]"
] ]
}, },
"execution_count": 8, "execution_count": 7,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -221,7 +221,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 8,
"id": "f900e40e", "id": "f900e40e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -238,7 +238,7 @@
"[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]" "[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]"
] ]
}, },
"execution_count": 9, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -250,7 +250,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 9,
"id": "12a51522", "id": "12a51522",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -267,7 +267,7 @@
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]" "[Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
] ]
}, },
"execution_count": 10, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -276,6 +276,14 @@
"# This example specifies a query and composite filter\n", "# This example specifies a query and composite filter\n",
"retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")" "retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60110338",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -294,7 +302,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.3" "version": "3.9.1"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -363,7 +363,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.3" "version": "3.9.1"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -95,7 +95,7 @@
"id": "5ecaab6d", "id": "5ecaab6d",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Creating our self-querying retriever\n", "## Creating our self-querying retriever\n",
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents." "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
] ]
}, },
@ -142,7 +142,7 @@
"id": "ea9df8d4", "id": "ea9df8d4",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Testing it out\n", "## Testing it out\n",
"And now we can try actually using our retriever!" "And now we can try actually using our retriever!"
] ]
}, },
@ -320,7 +320,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.3" "version": "3.9.1"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -36,13 +36,7 @@ class CSVLoader(BaseLoader):
self.file_path = file_path self.file_path = file_path
self.source_column = source_column self.source_column = source_column
self.encoding = encoding self.encoding = encoding
if csv_args is None: self.csv_args = csv_args or {}
self.csv_args = {
"delimiter": csv.Dialect.delimiter,
"quotechar": csv.Dialect.quotechar,
}
else:
self.csv_args = csv_args
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load data into document objects.""" """Load data into document objects."""