feat: unify settings for vector and nodestore connections to PostgreSQL (#1730)

* Unify pgvector and postgres connection settings * Remove local changes * Update file pgvector->postgres
2025-04-28 03:32:18 +00:00 · 2024-03-15 04:55:17 -04:00 · 2024-03-15 04:55:17 -04:00 · 63de7e4930
commit 63de7e4930
parent 68b3a34b03
5 changed files with 39 additions and 45 deletions
--- a/fern/docs/pages/manual/vectordb.mdx
+++ b/fern/docs/pages/manual/vectordb.mdx
@ -1,7 +1,7 @@
 ## Vectorstores
 PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) and [PGVector](https://github.com/pgvector/pgvector) as vectorstore providers. Qdrant being the default.

-In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `pgvector`.
+In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `postgres`.

 ```yaml
 vectorstore:
@ -50,14 +50,15 @@ poetry install --extras chroma
 By default `chroma` will use a disk-based database stored in local_data_path / "chroma_db" (being local_data_path defined in settings.yaml)

 ### PGVector
+To use the PGVector store a [postgreSQL](https://www.postgresql.org/) database with the PGVector extension must be used.

-To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `vector-stores-postgres` extra.
+To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `postgres` and install the `vector-stores-postgres` extra.

 ```bash
 poetry install --extras vector-stores-postgres
 ```

-PGVector settings can be configured by setting values to the `pgvector` property in the `settings.yaml` file.
+PGVector settings can be configured by setting values to the `postgres` property in the `settings.yaml` file.

 The available configuration options are:
 | Field         | Description                                               |
@ -67,19 +68,36 @@ The available configuration options are:
 | **database**  | The specific database to connect to. Default is `postgres` |
 | **user**      | The username for database access. Default is `postgres` |
 | **password**  | The password for database access. (Required)            |
-| **embed_dim** | The dimensionality of the embedding model (Required)    |
 | **schema_name** | The database schema to use. Default is `private_gpt`       |
-| **table_name** | The database table to use. Default is `embeddings`    |

 For example:
 ```yaml
-pgvector:
+vectorstore:
+  database: postgresql
+
+postgres:
  host: localhost
  port: 5432
  database: postgres
  user: postgres
  password: <PASSWORD>
-  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
  schema_name: private_gpt
-  table_name: embeddings
 ```
+
+The following table will be created in the database
+```
+postgres=# \d private_gpt.data_embeddings
+                                      Table "private_gpt.data_embeddings"
+  Column   |       Type        | Collation | Nullable |                         Default
+-----------+-------------------+-----------+----------+---------------------------------------------------------
+ id        | bigint            |           | not null | nextval('private_gpt.data_embeddings_id_seq'::regclass)
+ text      | character varying |           | not null |
+ metadata_ | json              |           |          |
+ node_id   | character varying |           |          |
+ embedding | vector(768)       |           |          |
+Indexes:
+    "data_embeddings_pkey" PRIMARY KEY, btree (id)
+
+postgres=# 
+```
+The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value.  If the embedding model changes this table may need to be dropped and recreated to avoid a dimension mismatch.
--- a/private_gpt/components/vector_store/vector_store_component.py
+++ b/private_gpt/components/vector_store/vector_store_component.py
@ -38,7 +38,7 @@ class VectorStoreComponent:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        match settings.vectorstore.database:
-            case "pgvector":
+            case "postgres":
                try:
                    from llama_index.vector_stores.postgres import (  # type: ignore
                        PGVectorStore,
@ -48,15 +48,17 @@ class VectorStoreComponent:
                        "Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`"
                    ) from e

-                if settings.pgvector is None:
+                if settings.postgres is None:
                    raise ValueError(
-                        "PGVectorStore settings not found. Please provide settings."
+                        "Postgres settings not found. Please provide settings."
                    )

                self.vector_store = typing.cast(
                    VectorStore,
                    PGVectorStore.from_params(
-                        **settings.pgvector.model_dump(exclude_none=True)
+                        **settings.postgres.model_dump(exclude_none=True),
+                        table_name="embeddings",
+                        embed_dim=settings.embedding.embed_dim,
                    ),
                )

--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -105,7 +105,7 @@ class LLMSettings(BaseModel):


 class VectorstoreSettings(BaseModel):
-    database: Literal["chroma", "qdrant", "pgvector"]
+    database: Literal["chroma", "qdrant", "postgres"]


 class NodeStoreSettings(BaseModel):
@ -177,6 +177,10 @@ class EmbeddingSettings(BaseModel):
            "Do not set it higher than your number of threads of your CPU."
        ),
    )
+    embed_dim: int = Field(
+        384,
+        description="The dimension of the embeddings stored in the Postgres database",
+    )


 class SagemakerSettings(BaseModel):
@ -280,17 +284,6 @@ class PostgresSettings(BaseModel):
    )


-class PGVectorSettings(PostgresSettings):
-    embed_dim: int = Field(
-        384,
-        description="The dimension of the embeddings stored in the Postgres database",
-    )
-    table_name: str = Field(
-        "embeddings",
-        description="The name of the table in the Postgres database where the embeddings are stored",
-    )
-
-
 class QdrantSettings(BaseModel):
    location: str | None = Field(
        None,
@ -360,7 +353,6 @@ class Settings(BaseModel):
    nodestore: NodeStoreSettings
    qdrant: QdrantSettings | None = None
    postgres: PostgresSettings | None = None
-    pgvector: PGVectorSettings | None = None


 """
--- a/settings-ollama-pg.yaml
+++ b/settings-ollama-pg.yaml
@ -11,6 +11,7 @@ llm:

 embedding:
  mode: ollama
+  embed_dim: 768

 ollama:
  llm_model: mistral
@ -21,17 +22,7 @@ nodestore:
  database: postgres

 vectorstore:
-  database: pgvector
-
-pgvector:
-  host: localhost
-  port: 5432
  database: postgres
-  user: postgres
-  password: admin
-  embed_dim: 768
-  schema_name: private_gpt
-  table_name: embeddings

 postgres:
  host: localhost
--- a/settings.yaml
+++ b/settings.yaml
@ -55,6 +55,7 @@ embedding:
  # Should be matching the value above in most cases
  mode: huggingface
  ingest_mode: simple
+  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5

 huggingface:
  embedding_hf_model_name: BAAI/bge-small-en-v1.5
@ -68,16 +69,6 @@ nodestore:
 qdrant:
  path: local_data/private_gpt/qdrant

-pgvector:
-  host: localhost
-  port: 5432
-  database: postgres
-  user: postgres
-  password: postgres
-  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
-  schema_name: private_gpt
-  table_name: embeddings
-
 postgres:
  host: localhost
  port: 5432