mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-04-28 03:32:18 +00:00
feat: unify settings for vector and nodestore connections to PostgreSQL (#1730)
* Unify pgvector and postgres connection settings * Remove local changes * Update file pgvector->postgres
This commit is contained in:
parent
68b3a34b03
commit
63de7e4930
@ -1,7 +1,7 @@
|
||||
## Vectorstores
|
||||
PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) and [PGVector](https://github.com/pgvector/pgvector) as vectorstore providers. Qdrant being the default.
|
||||
|
||||
In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `pgvector`.
|
||||
In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `postgres`.
|
||||
|
||||
```yaml
|
||||
vectorstore:
|
||||
@ -50,14 +50,15 @@ poetry install --extras chroma
|
||||
By default `chroma` will use a disk-based database stored in local_data_path / "chroma_db" (being local_data_path defined in settings.yaml)
|
||||
|
||||
### PGVector
|
||||
To use the PGVector store a [postgreSQL](https://www.postgresql.org/) database with the PGVector extension must be used.
|
||||
|
||||
To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `vector-stores-postgres` extra.
|
||||
To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `postgres` and install the `vector-stores-postgres` extra.
|
||||
|
||||
```bash
|
||||
poetry install --extras vector-stores-postgres
|
||||
```
|
||||
|
||||
PGVector settings can be configured by setting values to the `pgvector` property in the `settings.yaml` file.
|
||||
PGVector settings can be configured by setting values to the `postgres` property in the `settings.yaml` file.
|
||||
|
||||
The available configuration options are:
|
||||
| Field | Description |
|
||||
@ -67,19 +68,36 @@ The available configuration options are:
|
||||
| **database** | The specific database to connect to. Default is `postgres` |
|
||||
| **user** | The username for database access. Default is `postgres` |
|
||||
| **password** | The password for database access. (Required) |
|
||||
| **embed_dim** | The dimensionality of the embedding model (Required) |
|
||||
| **schema_name** | The database schema to use. Default is `private_gpt` |
|
||||
| **table_name** | The database table to use. Default is `embeddings` |
|
||||
|
||||
For example:
|
||||
```yaml
|
||||
pgvector:
|
||||
vectorstore:
|
||||
database: postgresql
|
||||
|
||||
postgres:
|
||||
host: localhost
|
||||
port: 5432
|
||||
database: postgres
|
||||
user: postgres
|
||||
password: <PASSWORD>
|
||||
embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
|
||||
schema_name: private_gpt
|
||||
table_name: embeddings
|
||||
```
|
||||
|
||||
The following table will be created in the database
|
||||
```
|
||||
postgres=# \d private_gpt.data_embeddings
|
||||
Table "private_gpt.data_embeddings"
|
||||
Column | Type | Collation | Nullable | Default
|
||||
-----------+-------------------+-----------+----------+---------------------------------------------------------
|
||||
id | bigint | | not null | nextval('private_gpt.data_embeddings_id_seq'::regclass)
|
||||
text | character varying | | not null |
|
||||
metadata_ | json | | |
|
||||
node_id | character varying | | |
|
||||
embedding | vector(768) | | |
|
||||
Indexes:
|
||||
"data_embeddings_pkey" PRIMARY KEY, btree (id)
|
||||
|
||||
postgres=#
|
||||
```
|
||||
The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value. If the embedding model changes this table may need to be dropped and recreated to avoid a dimension mismatch.
|
||||
|
@ -38,7 +38,7 @@ class VectorStoreComponent:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
match settings.vectorstore.database:
|
||||
case "pgvector":
|
||||
case "postgres":
|
||||
try:
|
||||
from llama_index.vector_stores.postgres import ( # type: ignore
|
||||
PGVectorStore,
|
||||
@ -48,15 +48,17 @@ class VectorStoreComponent:
|
||||
"Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`"
|
||||
) from e
|
||||
|
||||
if settings.pgvector is None:
|
||||
if settings.postgres is None:
|
||||
raise ValueError(
|
||||
"PGVectorStore settings not found. Please provide settings."
|
||||
"Postgres settings not found. Please provide settings."
|
||||
)
|
||||
|
||||
self.vector_store = typing.cast(
|
||||
VectorStore,
|
||||
PGVectorStore.from_params(
|
||||
**settings.pgvector.model_dump(exclude_none=True)
|
||||
**settings.postgres.model_dump(exclude_none=True),
|
||||
table_name="embeddings",
|
||||
embed_dim=settings.embedding.embed_dim,
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -105,7 +105,7 @@ class LLMSettings(BaseModel):
|
||||
|
||||
|
||||
class VectorstoreSettings(BaseModel):
|
||||
database: Literal["chroma", "qdrant", "pgvector"]
|
||||
database: Literal["chroma", "qdrant", "postgres"]
|
||||
|
||||
|
||||
class NodeStoreSettings(BaseModel):
|
||||
@ -177,6 +177,10 @@ class EmbeddingSettings(BaseModel):
|
||||
"Do not set it higher than your number of threads of your CPU."
|
||||
),
|
||||
)
|
||||
embed_dim: int = Field(
|
||||
384,
|
||||
description="The dimension of the embeddings stored in the Postgres database",
|
||||
)
|
||||
|
||||
|
||||
class SagemakerSettings(BaseModel):
|
||||
@ -280,17 +284,6 @@ class PostgresSettings(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class PGVectorSettings(PostgresSettings):
|
||||
embed_dim: int = Field(
|
||||
384,
|
||||
description="The dimension of the embeddings stored in the Postgres database",
|
||||
)
|
||||
table_name: str = Field(
|
||||
"embeddings",
|
||||
description="The name of the table in the Postgres database where the embeddings are stored",
|
||||
)
|
||||
|
||||
|
||||
class QdrantSettings(BaseModel):
|
||||
location: str | None = Field(
|
||||
None,
|
||||
@ -360,7 +353,6 @@ class Settings(BaseModel):
|
||||
nodestore: NodeStoreSettings
|
||||
qdrant: QdrantSettings | None = None
|
||||
postgres: PostgresSettings | None = None
|
||||
pgvector: PGVectorSettings | None = None
|
||||
|
||||
|
||||
"""
|
||||
|
@ -11,6 +11,7 @@ llm:
|
||||
|
||||
embedding:
|
||||
mode: ollama
|
||||
embed_dim: 768
|
||||
|
||||
ollama:
|
||||
llm_model: mistral
|
||||
@ -21,17 +22,7 @@ nodestore:
|
||||
database: postgres
|
||||
|
||||
vectorstore:
|
||||
database: pgvector
|
||||
|
||||
pgvector:
|
||||
host: localhost
|
||||
port: 5432
|
||||
database: postgres
|
||||
user: postgres
|
||||
password: admin
|
||||
embed_dim: 768
|
||||
schema_name: private_gpt
|
||||
table_name: embeddings
|
||||
|
||||
postgres:
|
||||
host: localhost
|
||||
|
@ -55,6 +55,7 @@ embedding:
|
||||
# Should be matching the value above in most cases
|
||||
mode: huggingface
|
||||
ingest_mode: simple
|
||||
embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
|
||||
|
||||
huggingface:
|
||||
embedding_hf_model_name: BAAI/bge-small-en-v1.5
|
||||
@ -68,16 +69,6 @@ nodestore:
|
||||
qdrant:
|
||||
path: local_data/private_gpt/qdrant
|
||||
|
||||
pgvector:
|
||||
host: localhost
|
||||
port: 5432
|
||||
database: postgres
|
||||
user: postgres
|
||||
password: postgres
|
||||
embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
|
||||
schema_name: private_gpt
|
||||
table_name: embeddings
|
||||
|
||||
postgres:
|
||||
host: localhost
|
||||
port: 5432
|
||||
|
Loading…
Reference in New Issue
Block a user