1
0
mirror of https://github.com/imartinez/privateGPT.git synced 2025-05-07 15:58:05 +00:00

feat(nodestore): add Postgres for the doc and index store ()

* Adding Postgres for the doc and index store

* Adding documentation.  Rename postgres database local->simple.  Postgres storage dependencies

* Update documentation for postgres storage

* Renaming feature to nodestore

* update docstore -> nodestore in doc

* missed some docstore changes in doc

* Updated poetry.lock

* Formatting updates to pass ruff/black checks

* Correction to unreachable code!

* Format adjustment to pass black test

* Adjust extra inclusion name for vector pg

* extra dep change for pg vector

* storage-postgres -> storage-nodestore-postgres

* Hash change on poetry lock
This commit is contained in:
Brett England 2024-03-14 12:12:33 -04:00 committed by GitHub
parent d17c34e81a
commit 68b3a34b03
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 225 additions and 25 deletions

View File

@ -58,6 +58,8 @@ navigation:
contents:
- page: Vector Stores
path: ./docs/pages/manual/vectordb.mdx
- page: Node Stores
path: ./docs/pages/manual/nodestore.mdx
- section: Advanced Setup
contents:
- page: LLM Backends

View File

@ -0,0 +1,66 @@
## NodeStores
PrivateGPT supports **Simple** and [Postgres](https://www.postgresql.org/) providers. Simple being the default.
In order to select one or the other, set the `nodestore.database` property in the `settings.yaml` file to `simple` or `postgres`.
```yaml
nodestore:
database: simple
```
### Simple Document Store
Setting up simple document store: Persist data with in-memory and disk storage.
Enabling the simple document store is an excellent choice for small projects or proofs of concept where you need to persist data while maintaining minimal setup complexity. To get started, set the nodestore.database property in your settings.yaml file as follows:
```yaml
nodestore:
database: simple
```
The beauty of the simple document store is its flexibility and ease of implementation. It provides a solid foundation for managing and retrieving data without the need for complex setup or configuration. The combination of in-memory processing and disk persistence ensures that you can efficiently handle small to medium-sized datasets while maintaining data consistency across runs.
### Postgres Document Store
To enable Postgres, set the `nodestore.database` property in the `settings.yaml` file to `postgres` and install the `storage-nodestore-postgres` extra. Note: Vector Embeddings Storage in Postgres is configured separately
```bash
poetry install --extras storage-nodestore-postgres
```
The available configuration options are:
| Field | Description |
|---------------|-----------------------------------------------------------|
| **host** | The server hosting the Postgres database. Default is `localhost` |
| **port** | The port on which the Postgres database is accessible. Default is `5432` |
| **database** | The specific database to connect to. Default is `postgres` |
| **user** | The username for database access. Default is `postgres` |
| **password** | The password for database access. (Required) |
| **schema_name** | The database schema to use. Default is `private_gpt` |
For example:
```yaml
nodestore:
database: postgres
postgres:
host: localhost
port: 5432
database: postgres
user: postgres
password: <PASSWORD>
schema_name: private_gpt
```
Given the above configuration, Two PostgreSQL tables will be created upon successful connection: one for storing metadata related to the index and another for document data itself.
```
postgres=# \dt private_gpt.*
List of relations
Schema | Name | Type | Owner
-------------+-----------------+-------+--------------
private_gpt | data_docstore | table | postgres
private_gpt | data_indexstore | table | postgres
postgres=#
```

View File

@ -51,10 +51,10 @@ By default `chroma` will use a disk-based database stored in local_data_path / "
### PGVector
To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `pgvector` extra.
To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `vector-stores-postgres` extra.
```bash
poetry install --extras pgvector
poetry install --extras vector-stores-postgres
```
PGVector settings can be configured by setting values to the `pgvector` property in the `settings.yaml` file.

34
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand.
[[package]]
name = "aiofiles"
@ -2202,6 +2202,34 @@ llama-index-core = ">=0.10.1,<0.11.0"
pymupdf = ">=1.23.21,<2.0.0"
pypdf = ">=4.0.1,<5.0.0"
[[package]]
name = "llama-index-storage-docstore-postgres"
version = "0.1.2"
description = "llama-index docstore postgres integration"
optional = true
python-versions = ">=3.8.1,<4.0"
files = [
{file = "llama_index_storage_docstore_postgres-0.1.2-py3-none-any.whl", hash = "sha256:54c9534d26a641af85857452ce09279eddec27ca14c3a50c4481e95f394daa08"},
{file = "llama_index_storage_docstore_postgres-0.1.2.tar.gz", hash = "sha256:40f5ebd9b461023110343c478caf9ef96c30317dd077e8b156460dff1568dba7"},
]
[package.dependencies]
llama-index-core = ">=0.10.1,<0.11.0"
[[package]]
name = "llama-index-storage-index-store-postgres"
version = "0.1.2"
description = "llama-index index_store postgres integration"
optional = true
python-versions = ">=3.8.1,<4.0"
files = [
{file = "llama_index_storage_index_store_postgres-0.1.2-py3-none-any.whl", hash = "sha256:8728c9cc5ce9312cf364e1cb1b65e0aba24321e20a16463d8f27f5a883b51b72"},
{file = "llama_index_storage_index_store_postgres-0.1.2.tar.gz", hash = "sha256:6a6af1ea6110b2b34de87acaf97c9615bbb738eb504fe89482fb6b973b07eb47"},
]
[package.dependencies]
llama-index-core = ">=0.10.1,<0.11.0"
[[package]]
name = "llama-index-vector-stores-chroma"
version = "0.1.4"
@ -4153,6 +4181,7 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@ -5918,6 +5947,7 @@ llms-ollama = ["llama-index-llms-ollama"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-sagemaker = ["boto3"]
storage-nodestore-postgres = ["asyncpg", "llama-index-storage-docstore-postgres", "llama-index-storage-index-store-postgres", "psycopg2-binary"]
ui = ["gradio"]
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
@ -5926,4 +5956,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.12"
content-hash = "41849a9d15848a354fd4cc0ca9d752148e76fee64d8bb5b881210c2290fc8072"
content-hash = "689df29f4f2209e7ae6638563f4bb25700d1454098d0c728a164a708d42fa377"

View File

@ -6,6 +6,7 @@ from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.storage.index_store.types import BaseIndexStore
from private_gpt.paths import local_data_path
from private_gpt.settings.settings import Settings
logger = logging.getLogger(__name__)
@ -16,19 +17,51 @@ class NodeStoreComponent:
doc_store: BaseDocumentStore
@inject
def __init__(self) -> None:
try:
self.index_store = SimpleIndexStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local index store not found, creating a new one")
self.index_store = SimpleIndexStore()
def __init__(self, settings: Settings) -> None:
match settings.nodestore.database:
case "simple":
try:
self.index_store = SimpleIndexStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local index store not found, creating a new one")
self.index_store = SimpleIndexStore()
try:
self.doc_store = SimpleDocumentStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local document store not found, creating a new one")
self.doc_store = SimpleDocumentStore()
try:
self.doc_store = SimpleDocumentStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local document store not found, creating a new one")
self.doc_store = SimpleDocumentStore()
case "postgres":
try:
from llama_index.core.storage.docstore.postgres_docstore import (
PostgresDocumentStore,
)
from llama_index.core.storage.index_store.postgres_index_store import (
PostgresIndexStore,
)
except ImportError:
raise ImportError(
"Postgres dependencies not found, install with `poetry install --extras storage-nodestore-postgres`"
) from None
if settings.postgres is None:
raise ValueError("Postgres index/doc store settings not found.")
self.index_store = PostgresIndexStore.from_params(
**settings.postgres.model_dump(exclude_none=True)
)
self.doc_store = PostgresDocumentStore.from_params(
**settings.postgres.model_dump(exclude_none=True)
)
case _:
# Should be unreachable
# The settings validator should have caught this
raise ValueError(
f"Database {settings.nodestore.database} not supported"
)

View File

@ -108,6 +108,10 @@ class VectorstoreSettings(BaseModel):
database: Literal["chroma", "qdrant", "pgvector"]
class NodeStoreSettings(BaseModel):
database: Literal["simple", "postgres"]
class LlamaCPPSettings(BaseModel):
llm_hf_repo_id: str
llm_hf_model_file: str
@ -249,7 +253,7 @@ class UISettings(BaseModel):
)
class PGVectorSettings(BaseModel):
class PostgresSettings(BaseModel):
host: str = Field(
"localhost",
description="The server hosting the Postgres database",
@ -270,14 +274,17 @@ class PGVectorSettings(BaseModel):
"postgres",
description="The database to use to connect to the Postgres database",
)
schema_name: str = Field(
"public",
description="The name of the schema in the Postgres database to use",
)
class PGVectorSettings(PostgresSettings):
embed_dim: int = Field(
384,
description="The dimension of the embeddings stored in the Postgres database",
)
schema_name: str = Field(
"public",
description="The name of the schema in the Postgres database where the embeddings are stored",
)
table_name: str = Field(
"embeddings",
description="The name of the table in the Postgres database where the embeddings are stored",
@ -350,7 +357,9 @@ class Settings(BaseModel):
openai: OpenAISettings
ollama: OllamaSettings
vectorstore: VectorstoreSettings
nodestore: NodeStoreSettings
qdrant: QdrantSettings | None = None
postgres: PostgresSettings | None = None
pgvector: PGVectorSettings | None = None

View File

@ -27,6 +27,12 @@ llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true}
llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.2", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.2", optional = true}
# Postgres
psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true}
# Optional Sagemaker dependency
boto3 = {version ="^1.34.51", optional = true}
# Optional UI
@ -46,7 +52,7 @@ embeddings-sagemaker = ["boto3"]
vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
[tool.poetry.group.dev.dependencies]
black = "^22"

43
settings-ollama-pg.yaml Normal file
View File

@ -0,0 +1,43 @@
# Using ollama and postgres for the vector, doc and index store. Ollama is also used for embeddings.
# To use install these extras:
# poetry install --extras "llms-ollama ui vector-stores-postgres embeddings-ollama storage-nodestore-postgres"
server:
env_name: ${APP_ENV:ollama}
llm:
mode: ollama
max_new_tokens: 512
context_window: 3900
embedding:
mode: ollama
ollama:
llm_model: mistral
embedding_model: nomic-embed-text
api_base: http://localhost:11434
nodestore:
database: postgres
vectorstore:
database: pgvector
pgvector:
host: localhost
port: 5432
database: postgres
user: postgres
password: admin
embed_dim: 768
schema_name: private_gpt
table_name: embeddings
postgres:
host: localhost
port: 5432
database: postgres
user: postgres
password: admin
schema_name: private_gpt

View File

@ -62,6 +62,9 @@ huggingface:
vectorstore:
database: qdrant
nodestore:
database: simple
qdrant:
path: local_data/private_gpt/qdrant
@ -75,6 +78,14 @@ pgvector:
schema_name: private_gpt
table_name: embeddings
postgres:
host: localhost
port: 5432
database: postgres
user: postgres
password: postgres
schema_name: private_gpt
sagemaker:
llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140
embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479