updated with new models OpenHermes and BAAI/bge-large embedding model

2025-08-28 12:01:29 +00:00 · 2024-03-31 15:36:52 +05:45 · 2024-03-31 15:36:52 +05:45 · 542ed0ef4e
commit 542ed0ef4e
parent cc786cf1de
14 changed files with 22 additions and 16 deletions
--- a/.env
+++ b/.env
@ -4,8 +4,8 @@ ENVIRONMENT=dev
 DB_HOST=localhost
 DB_USER=postgres
 DB_PORT=5432
-DB_PASSWORD=quick
+DB_PASSWORD=admin
-DB_NAME=QuickGpt
+DB_NAME=openai
 SUPER_ADMIN_EMAIL=superadmin@email.com
 SUPER_ADMIN_PASSWORD=supersecretpassword
--- a/.gitignore
+++ b/.gitignore
@ -36,3 +36,4 @@ __pycache__/
 *.docx
 *.png
--- a/local_data/.gitignore
+++ b/local_data/.gitignore
@ -0,0 +1,2 @@
 *
 !.gitignore
--- a/local_data/private_gpt/docstore.json
+++ b/local_data/private_gpt/docstore.json
--- a/local_data/private_gpt/graph_store.json
+++ b/local_data/private_gpt/graph_store.json
@ -1 +0,0 @@
 {"graph_dict": {}}
--- a/local_data/private_gpt/image__vector_store.json
+++ b/local_data/private_gpt/image__vector_store.json
@ -1 +0,0 @@
 {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}
--- a/local_data/private_gpt/index_store.json
+++ b/local_data/private_gpt/index_store.json
--- a/local_data/private_gpt/qdrant/.lock
+++ b/local_data/private_gpt/qdrant/.lock
@ -1 +0,0 @@
 tmp lock file
--- a/local_data/private_gpt/qdrant/collection/make_this_parameterizable_per_api_call/storage.sqlite
+++ b/local_data/private_gpt/qdrant/collection/make_this_parameterizable_per_api_call/storage.sqlite
--- a/local_data/private_gpt/qdrant/meta.json
+++ b/local_data/private_gpt/qdrant/meta.json
@ -1 +1 @@
-{"collections": {"make_this_parameterizable_per_api_call": {"vectors": {"size": 768, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null, "sparse_vectors": null}}, "aliases": {}}
+{"collections": {"make_this_parameterizable_per_api_call": {"vectors": {"size": 1024, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null, "sparse_vectors": null}}, "aliases": {}}
--- a/models/.gitignore
+++ b/models/.gitignore
@ -0,0 +1,2 @@
 *
 !.gitignore
--- a/private_gpt/server/ingest/ingest_router.py
+++ b/private_gpt/server/ingest/ingest_router.py
@ -211,6 +211,7 @@ async def create_documents(
    )
    print("DOCUMENT CREATE: ", docs_in)
    document = crud.documents.create(db=db, obj_in=docs_in)
    department_ids = department_ids if department_ids else "1"
    department_ids = [int(number) for number in department_ids.split(",")]
    for department_id in department_ids:
        db.execute(models.document_department_association.insert().values(document_id=document.id, department_id=department_id))
--- a/private_gpt/users/api/v1/routers/users.py
+++ b/private_gpt/users/api/v1/routers/users.py
@ -161,7 +161,8 @@ def read_user_me(
        username=current_user.username,
        company_id = current_user.company_id,
        department_id=current_user.department_id,
-        role =role
+        role =role,
        checker=current_user.checker
    )
    return JSONResponse(
        status_code=status.HTTP_200_OK,
--- a/settings.yaml
+++ b/settings.yaml
@ -41,14 +41,16 @@ llm:
  max_new_tokens: 512
  context_window: 3900
  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
-
+  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
 llamacpp:
-  prompt_style: "mistral"
+  prompt_style: "chatml"
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
+  llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
+  llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
-  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
+  top_p: 0.9            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
  repeat_last_n: 64     # Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
  repeat_penalty: 1.1   # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
 embedding:
@ -57,7 +59,7 @@ embedding:
  ingest_mode: simple
 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: BAAI/bge-large-en-v1.5
 vectorstore:
  database: qdrant
@ -71,7 +73,7 @@ pgvector:
  database: postgres
  user: postgres
  password: postgres
-  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
+  embed_dim: 768 # 384 is for BAAI/bge-small-en-v1.5
  schema_name: private_gpt
  table_name: embeddings
`@ -36,3 +36,4 @@ __pycache__/`
	`*.docx`	`*.docx`

	`*.png`	`*.png`
		`@ -1 +0,0 @@`
			`{"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}`
		`@ -1 +1 @@`
			`{"collections": {"make_this_parameterizable_per_api_call": {"vectors": {"size": 768, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null, "sparse_vectors": null}}, "aliases": {}}`				`{"collections": {"make_this_parameterizable_per_api_call": {"vectors": {"size": 1024, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "init_from": null, "quantization_config": null, "sparse_vectors": null}}, "aliases": {}}`