mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-14 14:05:37 +00:00
community[minor]: Implemented Kinetica Document Loader and added notebooks (#20002)
- [ ] **Kinetica Document Loader**: "community: a class to load Documents from Kinetica" - [ ] **Kinetica Document Loader**: - **Description:** implemented KineticaLoader in `kinetica_loader.py` - **Dependencies:** install the Kinetica API using `pip install gpudb==7.2.0.1 `
This commit is contained in:
125
docs/docs/integrations/document_loaders/kinetica.ipynb
Normal file
125
docs/docs/integrations/document_loaders/kinetica.ipynb
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kinetica\n",
|
||||
"\n",
|
||||
"This notebooks goes over how to load documents from Kinetica"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install gpudb==7.2.0.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.kinetica_loader import KineticaLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Loading Environment Variables\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from langchain_community.vectorstores import (\n",
|
||||
" KineticaSettings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Kinetica needs the connection to the database.\n",
|
||||
"# This is how to set it up.\n",
|
||||
"HOST = os.getenv(\"KINETICA_HOST\", \"http://127.0.0.1:9191\")\n",
|
||||
"USERNAME = os.getenv(\"KINETICA_USERNAME\", \"\")\n",
|
||||
"PASSWORD = os.getenv(\"KINETICA_PASSWORD\", \"\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def create_config() -> KineticaSettings:\n",
|
||||
" return KineticaSettings(host=HOST, username=USERNAME, password=PASSWORD)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.kinetica_loader import KineticaLoader\n",
|
||||
"\n",
|
||||
"# The following `QUERY` is an example which will not run; this\n",
|
||||
"# needs to be substituted with a valid `QUERY` that will return\n",
|
||||
"# data and the `SCHEMA.TABLE` combination must exist in Kinetica.\n",
|
||||
"\n",
|
||||
"QUERY = \"select text, survey_id from SCHEMA.TABLE limit 10\"\n",
|
||||
"kinetica_loader = KineticaLoader(\n",
|
||||
" QUERY,\n",
|
||||
" HOST,\n",
|
||||
" USERNAME,\n",
|
||||
" PASSWORD,\n",
|
||||
")\n",
|
||||
"kinetica_documents = kinetica_loader.load()\n",
|
||||
"print(kinetica_documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.kinetica_loader import KineticaLoader\n",
|
||||
"\n",
|
||||
"# The following `QUERY` is an example which will not run; this\n",
|
||||
"# needs to be substituted with a valid `QUERY` that will return\n",
|
||||
"# data and the `SCHEMA.TABLE` combination must exist in Kinetica.\n",
|
||||
"\n",
|
||||
"QUERY = \"select text, survey_id as source from SCHEMA.TABLE limit 10\"\n",
|
||||
"snowflake_loader = KineticaLoader(\n",
|
||||
" query=QUERY,\n",
|
||||
" host=HOST,\n",
|
||||
" username=USERNAME,\n",
|
||||
" password=PASSWORD,\n",
|
||||
" metadata_columns=[\"source\"],\n",
|
||||
")\n",
|
||||
"kinetica_documents = snowflake_loader.load()\n",
|
||||
"print(kinetica_documents)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -26,3 +26,19 @@ See [Kinetica Vectorsore API](/docs/integrations/vectorstores/kinetica) for usag
|
||||
from langchain_community.vectorstores import Kinetica
|
||||
```
|
||||
|
||||
## Document Loader
|
||||
|
||||
The Kinetica Document loader can be used to load LangChain Documents from the
|
||||
Kinetica database.
|
||||
|
||||
See [Kinetica Document Loader](/docs/integrations/document_loaders/kinetica) for usage
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders.kinetica_loader import KineticaLoader
|
||||
```
|
||||
|
||||
## Retriever
|
||||
|
||||
The Kinetica Retriever can return documents given an unstructured query.
|
||||
|
||||
See [Kinetica VectorStore based Retriever](/docs/integrations/retrievers/kinetica) for usage
|
||||
|
171
docs/docs/integrations/retrievers/kinetica.ipynb
Normal file
171
docs/docs/integrations/retrievers/kinetica.ipynb
Normal file
@@ -0,0 +1,171 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kinetica Vectorstore based Retriever\n",
|
||||
"\n",
|
||||
">[Kinetica](https://www.kinetica.com/) is a database with integrated support for vector similarity search\n",
|
||||
"\n",
|
||||
"It supports:\n",
|
||||
"- exact and approximate nearest neighbor search\n",
|
||||
"- L2 distance, inner product, and cosine distance\n",
|
||||
"\n",
|
||||
"This notebook shows how to use a retriever based on Kinetica vector store (`Kinetica`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Please ensure that this connector is installed in your working environment.\n",
|
||||
"%pip install gpudb==7.2.0.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Loading Environment Variables\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import (\n",
|
||||
" Kinetica,\n",
|
||||
" KineticaSettings,\n",
|
||||
")\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Kinetica needs the connection to the database.\n",
|
||||
"# This is how to set it up.\n",
|
||||
"HOST = os.getenv(\"KINETICA_HOST\", \"http://127.0.0.1:9191\")\n",
|
||||
"USERNAME = os.getenv(\"KINETICA_USERNAME\", \"\")\n",
|
||||
"PASSWORD = os.getenv(\"KINETICA_PASSWORD\", \"\")\n",
|
||||
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def create_config() -> KineticaSettings:\n",
|
||||
" return KineticaSettings(host=HOST, username=USERNAME, password=PASSWORD)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Retriever from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"# The Kinetica Module will try to create a table with the name of the collection.\n",
|
||||
"# So, make sure that the collection name is unique and the user has the permission to create a table.\n",
|
||||
"\n",
|
||||
"COLLECTION_NAME = \"state_of_the_union_test\"\n",
|
||||
"connection = create_config()\n",
|
||||
"\n",
|
||||
"db = Kinetica.from_documents(\n",
|
||||
" embedding=embeddings,\n",
|
||||
" documents=docs,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" config=connection,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# create retriever from the vector store\n",
|
||||
"retriever = db.as_retriever(search_kwargs={\"k\": 2})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Search with retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = retriever.get_relevant_documents(\n",
|
||||
" \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
")\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Reference in New Issue
Block a user