mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-04-28 11:36:25 +00:00
feat(vectordb): Milvus vector db Integration (#1996)
* integrate Milvus into Private GPT * adjust milvus settings * update doc info and reformat * adjust milvus initialization * adjust import error * mionr update * adjust format * adjust the db storing path * update doc
This commit is contained in:
parent
4523a30c8f
commit
43cc31f740
@ -44,6 +44,7 @@ will load the configuration from `settings.yaml` and `settings-ollama.yaml`.
|
||||
|
||||
## About Fully Local Setups
|
||||
In order to run PrivateGPT in a fully local setup, you will need to run the LLM, Embeddings and Vector Store locally.
|
||||
|
||||
### LLM
|
||||
For local LLM there are two options:
|
||||
* (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.
|
||||
@ -63,4 +64,4 @@ In order for HuggingFace LLM to work (the second option), you need to download t
|
||||
poetry run python scripts/setup
|
||||
```
|
||||
### Vector stores
|
||||
The vector stores supported (Qdrant, ChromaDB and Postgres) run locally by default.
|
||||
The vector stores supported (Qdrant, Milvus, ChromaDB and Postgres) run locally by default.
|
@ -82,6 +82,7 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI).
|
||||
| **Option** | **Description** | **Extra** |
|
||||
|------------------|-----------------------------------------|-------------------------|
|
||||
| **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant |
|
||||
| milvus | Adds support for Milvus vector store | vector-stores-milvus |
|
||||
| chroma | Adds support for Chroma DB vector store | vector-stores-chroma |
|
||||
| postgres | Adds support for Postgres vector store | vector-stores-postgres |
|
||||
| clickhouse | Adds support for Clickhouse vector store| vector-stores-clickhouse|
|
||||
|
@ -1,6 +1,7 @@
|
||||
PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.
|
||||
## Vectorstores
|
||||
PrivateGPT supports [Qdrant](https://qdrant.tech/), [Milvus](https://milvus.io/), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.
|
||||
|
||||
In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma`, `postgres` and `clickhouse`.
|
||||
In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `milvus`, `chroma`, `postgres` and `clickhouse`.
|
||||
|
||||
```yaml
|
||||
vectorstore:
|
||||
@ -38,6 +39,24 @@ qdrant:
|
||||
path: local_data/private_gpt/qdrant
|
||||
```
|
||||
|
||||
### Milvus configuration
|
||||
|
||||
To enable Milvus, set the `vectorstore.database` property in the `settings.yaml` file to `milvus` and install the `milvus` extra.
|
||||
|
||||
```bash
|
||||
poetry install --extras vector-stores-milvus
|
||||
```
|
||||
|
||||
The available configuration options are:
|
||||
| Field | Description |
|
||||
|--------------|-------------|
|
||||
| uri | Default is set to "local_data/private_gpt/milvus/milvus_local.db" as a local file; you can also set up a more performant Milvus server on docker or k8s e.g.http://localhost:19530, as your uri; To use Zilliz Cloud, adjust the uri and token to Endpoint and Api key in Zilliz Cloud.|
|
||||
| token | Pair with Milvus server on docker or k8s or zilliz cloud api key.|
|
||||
| collection_name | The name of the collection, set to default "milvus_db".|
|
||||
| overwrite | Overwrite the data in collection if it existed, set to default as True. |
|
||||
|
||||
To obtain a local setup (disk-based database) without running a Milvus server, configure the uri value in settings.yaml, to store in local_data/private_gpt/milvus/milvus_local.db.
|
||||
|
||||
### Chroma configuration
|
||||
|
||||
To enable Chroma, set the `vectorstore.database` property in the `settings.yaml` file to `chroma` and install the `chroma` extra.
|
||||
|
82
poetry.lock
generated
82
poetry.lock
generated
@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiofiles"
|
||||
@ -1182,6 +1182,27 @@ files = [
|
||||
dnspython = ">=2.0.0"
|
||||
idna = ">=2.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "environs"
|
||||
version = "9.5.0"
|
||||
description = "simplified environment variable parsing"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"},
|
||||
{file = "environs-9.5.0.tar.gz", hash = "sha256:a76307b36fbe856bdca7ee9161e6c466fd7fcffc297109a118c59b54e27e30c9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
marshmallow = ">=3.0.0"
|
||||
python-dotenv = "*"
|
||||
|
||||
[package.extras]
|
||||
dev = ["dj-database-url", "dj-email-url", "django-cache-url", "flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)", "pytest", "tox"]
|
||||
django = ["dj-database-url", "dj-email-url", "django-cache-url"]
|
||||
lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"]
|
||||
tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "fastapi"
|
||||
version = "0.111.0"
|
||||
@ -2735,6 +2756,21 @@ files = [
|
||||
clickhouse-connect = ">=0.7.0,<0.8.0"
|
||||
llama-index-core = ">=0.10.5,<0.11.0"
|
||||
|
||||
[[package]]
|
||||
name = "llama-index-vector-stores-milvus"
|
||||
version = "0.1.20"
|
||||
description = "llama-index vector_stores milvus integration"
|
||||
optional = true
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "llama_index_vector_stores_milvus-0.1.20-py3-none-any.whl", hash = "sha256:27a61fd237e67b648f36964c2e25275df4cb20dd740d111f0b75db477259ef5b"},
|
||||
{file = "llama_index_vector_stores_milvus-0.1.20.tar.gz", hash = "sha256:461bccce036be7bb739e57eb3855f64557c506023febfc08f98899778d460602"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
llama-index-core = ">=0.10.1,<0.11.0"
|
||||
pymilvus = ">=2.3.6,<3.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "llama-index-vector-stores-postgres"
|
||||
version = "0.1.11"
|
||||
@ -2991,6 +3027,22 @@ files = [
|
||||
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "milvus-lite"
|
||||
version = "2.4.8"
|
||||
description = "A lightweight version of Milvus wrapped with Python."
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "milvus_lite-2.4.8-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:b7e90b34b214884cd44cdc112ab243d4cb197b775498355e2437b6cafea025fe"},
|
||||
{file = "milvus_lite-2.4.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:519dfc62709d8f642d98a1c5b1dcde7080d107e6e312d677fef5a3412a40ac08"},
|
||||
{file = "milvus_lite-2.4.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b21f36d24cbb0e920b4faad607019bb28c1b2c88b4d04680ac8c7697a4ae8a4d"},
|
||||
{file = "milvus_lite-2.4.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:08332a2b9abfe7c4e1d7926068937e46f8fb81f2707928b7bc02c9dc99cebe41"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
tqdm = "*"
|
||||
|
||||
[[package]]
|
||||
name = "minijinja"
|
||||
version = "2.0.1"
|
||||
@ -4578,6 +4630,31 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
|
||||
docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
|
||||
tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pymilvus"
|
||||
version = "2.4.4"
|
||||
description = "Python Sdk for Milvus"
|
||||
optional = true
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pymilvus-2.4.4-py3-none-any.whl", hash = "sha256:073b76bc36f6f4e70f0f0a0023a53324f0ba8ef9a60883f87cd30a44b6c6f2b5"},
|
||||
{file = "pymilvus-2.4.4.tar.gz", hash = "sha256:50c53eb103e034fbffe936fe942751ea3dbd2452e18cf79acc52360ed4987fb7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
environs = "<=9.5.0"
|
||||
grpcio = ">=1.49.1,<=1.63.0"
|
||||
milvus-lite = {version = ">=2.4.0,<2.5.0", markers = "sys_platform != \"win32\""}
|
||||
pandas = ">=1.2.4"
|
||||
protobuf = ">=3.20.0"
|
||||
setuptools = ">=67"
|
||||
ujson = ">=2.0.0"
|
||||
|
||||
[package.extras]
|
||||
bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"]
|
||||
dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"]
|
||||
model = ["milvus-model (>=0.1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.1.1"
|
||||
@ -6777,10 +6854,11 @@ storage-nodestore-postgres = ["asyncpg", "llama-index-storage-docstore-postgres"
|
||||
ui = ["gradio"]
|
||||
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
|
||||
vector-stores-clickhouse = ["clickhouse-connect", "llama-index-vector-stores-clickhouse"]
|
||||
vector-stores-milvus = ["llama-index-vector-stores-milvus"]
|
||||
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
|
||||
vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.11,<3.12"
|
||||
content-hash = "5a2ffe28c38fe59d64fcbf2094b804da8e3f784dc42e1926eb7bd8bcd9dc6056"
|
||||
content-hash = "5e916cce1a7805965795dbaee0e2d24612e54305af4b1936d6bc1fa469b8012f"
|
||||
|
@ -121,6 +121,45 @@ class VectorStoreComponent:
|
||||
collection_name="make_this_parameterizable_per_api_call",
|
||||
), # TODO
|
||||
)
|
||||
|
||||
case "milvus":
|
||||
try:
|
||||
from llama_index.vector_stores.milvus import ( # type: ignore
|
||||
MilvusVectorStore,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Milvus dependencies not found, install with `poetry install --extras vector-stores-milvus`"
|
||||
) from e
|
||||
|
||||
if settings.milvus is None:
|
||||
logger.info(
|
||||
"Milvus config not found. Using default settings.\n"
|
||||
"Trying to connect to Milvus at local_data/private_gpt/milvus/milvus_local.db "
|
||||
"with collection 'make_this_parameterizable_per_api_call'."
|
||||
)
|
||||
|
||||
self.vector_store = typing.cast(
|
||||
BasePydanticVectorStore,
|
||||
MilvusVectorStore(
|
||||
dim=settings.embedding.embed_dim,
|
||||
collection_name="make_this_parameterizable_per_api_call",
|
||||
overwrite=True,
|
||||
),
|
||||
)
|
||||
|
||||
else:
|
||||
self.vector_store = typing.cast(
|
||||
BasePydanticVectorStore,
|
||||
MilvusVectorStore(
|
||||
dim=settings.embedding.embed_dim,
|
||||
uri=settings.milvus.uri,
|
||||
token=settings.milvus.token,
|
||||
collection_name=settings.milvus.collection_name,
|
||||
overwrite=settings.milvus.overwrite,
|
||||
),
|
||||
)
|
||||
|
||||
case "clickhouse":
|
||||
try:
|
||||
from clickhouse_connect import ( # type: ignore
|
||||
|
@ -125,7 +125,7 @@ class LLMSettings(BaseModel):
|
||||
|
||||
|
||||
class VectorstoreSettings(BaseModel):
|
||||
database: Literal["chroma", "qdrant", "postgres", "clickhouse"]
|
||||
database: Literal["chroma", "qdrant", "postgres", "clickhouse", "milvus"]
|
||||
|
||||
|
||||
class NodeStoreSettings(BaseModel):
|
||||
@ -508,6 +508,27 @@ class QdrantSettings(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class MilvusSettings(BaseModel):
|
||||
uri: str = Field(
|
||||
"local_data/private_gpt/milvus/milvus_local.db",
|
||||
description="The URI of the Milvus instance. For example: 'local_data/private_gpt/milvus/milvus_local.db' for Milvus Lite.",
|
||||
)
|
||||
token: str = Field(
|
||||
"",
|
||||
description=(
|
||||
"A valid access token to access the specified Milvus instance. "
|
||||
"This can be used as a recommended alternative to setting user and password separately. "
|
||||
),
|
||||
)
|
||||
collection_name: str = Field(
|
||||
"make_this_parameterizable_per_api_call",
|
||||
description="The name of the collection in Milvus. Default is 'make_this_parameterizable_per_api_call'.",
|
||||
)
|
||||
overwrite: bool = Field(
|
||||
True, description="Overwrite the previous collection schema if it exists."
|
||||
)
|
||||
|
||||
|
||||
class Settings(BaseModel):
|
||||
server: ServerSettings
|
||||
data: DataSettings
|
||||
@ -527,6 +548,7 @@ class Settings(BaseModel):
|
||||
qdrant: QdrantSettings | None = None
|
||||
postgres: PostgresSettings | None = None
|
||||
clickhouse: ClickHouseSettings | None = None
|
||||
milvus: MilvusSettings | None = None
|
||||
|
||||
|
||||
"""
|
||||
|
@ -31,6 +31,7 @@ llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
|
||||
llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
|
||||
llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
|
||||
llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
|
||||
llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
|
||||
llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
|
||||
llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
|
||||
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
|
||||
@ -78,6 +79,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
|
||||
vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
|
||||
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
|
||||
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
|
||||
vector-stores-milvus = ["llama-index-vector-stores-milvus"]
|
||||
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
|
||||
rerank-sentence-transformers = ["torch", "sentence-transformers"]
|
||||
|
||||
|
@ -85,6 +85,11 @@ vectorstore:
|
||||
nodestore:
|
||||
database: simple
|
||||
|
||||
milvus:
|
||||
uri: local_data/private_gpt/milvus/milvus_local.db
|
||||
collection_name: milvus_db
|
||||
overwrite: false
|
||||
|
||||
qdrant:
|
||||
path: local_data/private_gpt/qdrant
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user