mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-28 12:01:29 +00:00
Added ClickHouse vector sotre support
This commit is contained in:
parent
c1802e7cf0
commit
117a548117
@ -1,7 +1,7 @@
|
||||
## Vectorstores
|
||||
PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) and [PGVector](https://github.com/pgvector/pgvector) as vectorstore providers. Qdrant being the default.
|
||||
PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.
|
||||
|
||||
In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `postgres`.
|
||||
In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma`, `postgres` and `clickhouse`.
|
||||
|
||||
```yaml
|
||||
vectorstore:
|
||||
@ -101,3 +101,69 @@ Indexes:
|
||||
postgres=#
|
||||
```
|
||||
The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value. If the embedding model changes this table may need to be dropped and recreated to avoid a dimension mismatch.
|
||||
|
||||
### ClickHouse
|
||||
|
||||
To utilize ClickHouse as the vector store, a [ClickHouse](https://github.com/ClickHouse/ClickHouse) database must be employed.
|
||||
|
||||
To enable ClickHouse, set the `vectorstore.database` property in the `settings.yaml` file to `clickhouse` and install the `vector-stores-clickhouse` extra.
|
||||
|
||||
```bash
|
||||
poetry install --extras vector-stores-clickhouse
|
||||
```
|
||||
|
||||
ClickHouse settings can be configured by setting values to the `clickhouse` property in the `settings.yaml` file.
|
||||
|
||||
The available configuration options are:
|
||||
| Field | Description |
|
||||
|----------------------|----------------------------------------------------------------|
|
||||
| **host** | The server hosting the ClickHouse database. Default is `localhost` |
|
||||
| **port** | The port on which the ClickHouse database is accessible. Default is `8123` |
|
||||
| **username** | The username for database access. Default is `default` |
|
||||
| **password** | The password for database access. (Optional) |
|
||||
| **database** | The specific database to connect to. Default is `__default__` |
|
||||
| **secure** | Use https/TLS for secure connection to the server. Default is `false` |
|
||||
| **interface** | The protocol used for the connection, either 'http' or 'https'. (Optional) |
|
||||
| **settings** | Specific ClickHouse server settings to be used with the session. (Optional) |
|
||||
| **connect_timeout** | Timeout in seconds for establishing a connection. (Optional) |
|
||||
| **send_receive_timeout** | Read timeout in seconds for http connection. (Optional) |
|
||||
| **verify** | Verify the server certificate in secure/https mode. (Optional) |
|
||||
| **ca_cert** | Path to Certificate Authority root certificate (.pem format). (Optional) |
|
||||
| **client_cert** | Path to TLS Client certificate (.pem format). (Optional) |
|
||||
| **client_cert_key** | Path to the private key for the TLS Client certificate. (Optional) |
|
||||
| **http_proxy** | HTTP proxy address. (Optional) |
|
||||
| **https_proxy** | HTTPS proxy address. (Optional) |
|
||||
| **server_host_name** | Server host name to be checked against the TLS certificate. (Optional) |
|
||||
|
||||
For example:
|
||||
```yaml
|
||||
vectorstore:
|
||||
database: clickhouse
|
||||
|
||||
clickhouse:
|
||||
host: localhost
|
||||
port: 8443
|
||||
username: admin
|
||||
password: <PASSWORD>
|
||||
database: embeddings
|
||||
secure: false
|
||||
```
|
||||
|
||||
The following table will be created in the database:
|
||||
```
|
||||
clickhouse-client
|
||||
:) \d embeddings.llama_index
|
||||
Table "llama_index"
|
||||
№ | name | type | default_type | default_expression | comment | codec_expression | ttl_expression
|
||||
----|-----------|----------------------------------------------|--------------|--------------------|---------|------------------|---------------
|
||||
1 | id | String | | | | |
|
||||
2 | doc_id | String | | | | |
|
||||
3 | text | String | | | | |
|
||||
4 | vector | Array(Float32) | | | | |
|
||||
5 | node_info | Tuple(start Nullable(UInt64), end Nullable(UInt64)) | | | | |
|
||||
6 | metadata | String | | | | |
|
||||
|
||||
clickhouse-client
|
||||
```
|
||||
|
||||
The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value. If the embedding model changes, this table may need to be dropped and recreated to avoid a dimension mismatch.
|
||||
|
@ -121,6 +121,25 @@ class VectorStoreComponent:
|
||||
collection_name="make_this_parameterizable_per_api_call",
|
||||
), # TODO
|
||||
)
|
||||
case "clickhouse":
|
||||
try:
|
||||
from llama_index.vector_stores.clickhouse import ClickHouseVectorStore
|
||||
from clickhouse_connect import get_client
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"ClickHouse dependencies not found, install with `poetry install --extras vector-stores-clickhouse`"
|
||||
) from e
|
||||
|
||||
if settings.clickhouse is None:
|
||||
raise ValueError("ClickHouse settings not found. Please provide settings.")
|
||||
|
||||
clickhouse_client = get_client(
|
||||
host=settings.clickhouse.host,
|
||||
port=settings.clickhouse.port,
|
||||
username=settings.clickhouse.username,
|
||||
password=settings.clickhouse.password,
|
||||
)
|
||||
self.vector_store = ClickHouseVectorStore(clickhouse_client=clickhouse_client)
|
||||
case _:
|
||||
# Should be unreachable
|
||||
# The settings validator should have caught this
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Literal
|
||||
from typing import Literal, Optional, Dict, Any, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@ -107,7 +107,7 @@ class LLMSettings(BaseModel):
|
||||
|
||||
|
||||
class VectorstoreSettings(BaseModel):
|
||||
database: Literal["chroma", "qdrant", "postgres"]
|
||||
database: Literal["chroma", "qdrant", "postgres", "clickhouse"]
|
||||
|
||||
|
||||
class NodeStoreSettings(BaseModel):
|
||||
@ -323,6 +323,77 @@ class RagSettings(BaseModel):
|
||||
rerank: RerankSettings
|
||||
|
||||
|
||||
class ClickHouseSettings(BaseModel):
|
||||
host: str = Field(
|
||||
"localhost",
|
||||
description="The server hosting the ClickHouse database",
|
||||
)
|
||||
port: int = Field(
|
||||
8123,
|
||||
description="The port on which the ClickHouse database is accessible",
|
||||
)
|
||||
username: str = Field(
|
||||
"default",
|
||||
description="The username to use to connect to the ClickHouse database",
|
||||
)
|
||||
password: str = Field(
|
||||
"",
|
||||
description="The password to use to connect to the ClickHouse database",
|
||||
)
|
||||
database: str = Field(
|
||||
"__default__",
|
||||
description="The default database to use for connections",
|
||||
)
|
||||
secure: Union[bool, str] = Field(
|
||||
False,
|
||||
description="Use https/TLS for secure connection to the server",
|
||||
)
|
||||
interface: Optional[str] = Field(
|
||||
None,
|
||||
description="Must be either 'http' or 'https'. Determines the protocol to use for the connection",
|
||||
)
|
||||
settings: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Specific ClickHouse server settings to be used with the session",
|
||||
)
|
||||
connect_timeout: Optional[int] = Field(
|
||||
None,
|
||||
description="Timeout in seconds for establishing a connection",
|
||||
)
|
||||
send_receive_timeout: Optional[int] = Field(
|
||||
None,
|
||||
description="Read timeout in seconds for http connection",
|
||||
)
|
||||
verify: Optional[bool] = Field(
|
||||
None,
|
||||
description="Verify the server certificate in secure/https mode",
|
||||
)
|
||||
ca_cert: Optional[str] = Field(
|
||||
None,
|
||||
description="Path to Certificate Authority root certificate (.pem format)",
|
||||
)
|
||||
client_cert: Optional[str] = Field(
|
||||
None,
|
||||
description="Path to TLS Client certificate (.pem format)",
|
||||
)
|
||||
client_cert_key: Optional[str] = Field(
|
||||
None,
|
||||
description="Path to the private key for the TLS Client certificate",
|
||||
)
|
||||
http_proxy: Optional[str] = Field(
|
||||
None,
|
||||
description="HTTP proxy address",
|
||||
)
|
||||
https_proxy: Optional[str] = Field(
|
||||
None,
|
||||
description="HTTPS proxy address",
|
||||
)
|
||||
server_host_name: Optional[str] = Field(
|
||||
None,
|
||||
description="Server host name to be checked against the TLS certificate",
|
||||
)
|
||||
|
||||
|
||||
class PostgresSettings(BaseModel):
|
||||
host: str = Field(
|
||||
"localhost",
|
||||
@ -421,6 +492,7 @@ class Settings(BaseModel):
|
||||
rag: RagSettings
|
||||
qdrant: QdrantSettings | None = None
|
||||
postgres: PostgresSettings | None = None
|
||||
clickhouse: ClickHouseSettings | None = None
|
||||
|
||||
|
||||
"""
|
||||
|
@ -31,12 +31,16 @@ llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
|
||||
llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
|
||||
llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true}
|
||||
llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true}
|
||||
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
|
||||
llama-index-storage-docstore-postgres = {version ="^0.1.2", optional = true}
|
||||
llama-index-storage-index-store-postgres = {version ="^0.1.2", optional = true}
|
||||
# Postgres
|
||||
psycopg2-binary = {version ="^2.9.9", optional = true}
|
||||
asyncpg = {version="^0.29.0", optional = true}
|
||||
|
||||
# ClickHouse
|
||||
clickhouse-connect = {version = "^0.7.8", optional = true}
|
||||
|
||||
# Optional Sagemaker dependency
|
||||
boto3 = {version ="^1.34.51", optional = true}
|
||||
|
||||
@ -61,6 +65,7 @@ embeddings-openai = ["llama-index-embeddings-openai"]
|
||||
embeddings-sagemaker = ["boto3"]
|
||||
embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
|
||||
vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
|
||||
vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
|
||||
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
|
||||
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
|
||||
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
|
||||
|
@ -52,6 +52,13 @@ rag:
|
||||
model: cross-encoder/ms-marco-MiniLM-L-2-v2
|
||||
top_n: 1
|
||||
|
||||
clickhouse:
|
||||
host: localhost
|
||||
port: 8443
|
||||
username: admin
|
||||
password: clickhouse
|
||||
database: embeddings
|
||||
|
||||
llamacpp:
|
||||
prompt_style: "mistral"
|
||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
||||
|
Loading…
Reference in New Issue
Block a user