mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-09-23 20:17:24 +00:00
Merge remote-tracking branch 'origin/main' into load-context-template-settings
This commit is contained in:
@@ -29,7 +29,7 @@ RUN mkdir local_data; chown worker local_data
|
||||
RUN mkdir models; chown worker models
|
||||
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||
COPY --chown=worker private_gpt/ private_gpt
|
||||
COPY --chown=worker docs/ docs
|
||||
COPY --chown=worker fern/ fern
|
||||
COPY --chown=worker *.yaml *.md ./
|
||||
|
||||
USER worker
|
||||
|
@@ -40,7 +40,7 @@ RUN mkdir local_data; chown worker local_data
|
||||
RUN mkdir models; chown worker models
|
||||
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||
COPY --chown=worker private_gpt/ private_gpt
|
||||
COPY --chown=worker docs/ docs
|
||||
COPY --chown=worker fern/ fern
|
||||
COPY --chown=worker *.yaml *.md ./
|
||||
|
||||
USER worker
|
||||
|
@@ -1 +1,14 @@
|
||||
# API Reference
|
||||
|
||||
The API is divided in two logical blocks:
|
||||
|
||||
1. High-level API, abstracting all the complexity of a RAG (Retrieval Augmented Generation) pipeline implementation:
|
||||
- Ingestion of documents: internally managing document parsing, splitting, metadata extraction,
|
||||
embedding generation and storage.
|
||||
- Chat & Completions using context from ingested documents: abstracting the retrieval of context, the prompt
|
||||
engineering and the response generation.
|
||||
|
||||
2. Low-level API, allowing advanced users to implement their own complex pipelines:
|
||||
- Embeddings generation: based on a piece of text.
|
||||
- Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested
|
||||
documents.
|
@@ -32,21 +32,6 @@ The installation guide will help you in the [Installation section](/installation
|
||||
/>
|
||||
</Cards>
|
||||
|
||||
## API Organization
|
||||
|
||||
The API is divided in two logical blocks:
|
||||
|
||||
1. High-level API, abstracting all the complexity of a RAG (Retrieval Augmented Generation) pipeline implementation:
|
||||
- Ingestion of documents: internally managing document parsing, splitting, metadata extraction,
|
||||
embedding generation and storage.
|
||||
- Chat & Completions using context from ingested documents: abstracting the retrieval of context, the prompt
|
||||
engineering and the response generation.
|
||||
|
||||
2. Low-level API, allowing advanced users to implement their own complex pipelines:
|
||||
- Embeddings generation: based on a piece of text.
|
||||
- Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested
|
||||
documents.
|
||||
|
||||
<Callout intent = "info">
|
||||
A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
|
||||
model download script, ingestion script, documents folder watch, etc.
|
||||
|
@@ -137,7 +137,7 @@
|
||||
"Ingestion"
|
||||
],
|
||||
"summary": "Ingest",
|
||||
"description": "Ingests and processes a file, storing its chunks to be used as context.\n\nThe context obtained from files is later used in\n`/chat/completions`, `/completions`, and `/chunks` APIs.\n\nMost common document\nformats are supported, but you may be prompted to install an extra dependency to\nmanage a specific file type.\n\nA file can generate different Documents (for example a PDF generates one Document\nper page). All Documents IDs are returned in the response, together with the\nextracted Metadata (which is later used to improve context retrieval). Those IDs\ncan be used to filter the context used to create responses in\n`/chat/completions`, `/completions`, and `/chunks` APIs.",
|
||||
"description": "Ingests and processes a file.\n\nDeprecated. Use ingest/file instead.",
|
||||
"operationId": "ingest_v1_ingest_post",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
@@ -149,6 +149,91 @@
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful Response",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/IngestResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Validation Error",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPValidationError"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/ingest/file": {
|
||||
"post": {
|
||||
"tags": [
|
||||
"Ingestion"
|
||||
],
|
||||
"summary": "Ingest File",
|
||||
"description": "Ingests and processes a file, storing its chunks to be used as context.\n\nThe context obtained from files is later used in\n`/chat/completions`, `/completions`, and `/chunks` APIs.\n\nMost common document\nformats are supported, but you may be prompted to install an extra dependency to\nmanage a specific file type.\n\nA file can generate different Documents (for example a PDF generates one Document\nper page). All Documents IDs are returned in the response, together with the\nextracted Metadata (which is later used to improve context retrieval). Those IDs\ncan be used to filter the context used to create responses in\n`/chat/completions`, `/completions`, and `/chunks` APIs.",
|
||||
"operationId": "ingest_file_v1_ingest_file_post",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"multipart/form-data": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Body_ingest_file_v1_ingest_file_post"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful Response",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/IngestResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Validation Error",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPValidationError"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/ingest/text": {
|
||||
"post": {
|
||||
"tags": [
|
||||
"Ingestion"
|
||||
],
|
||||
"summary": "Ingest Text",
|
||||
"description": "Ingests and processes a text, storing its chunks to be used as context.\n\nThe context obtained from files is later used in\n`/chat/completions`, `/completions`, and `/chunks` APIs.\n\nA Document will be generated with the given text. The Document\nID is returned in the response, together with the\nextracted Metadata (which is later used to improve context retrieval). That ID\ncan be used to filter the context used to create responses in\n`/chat/completions`, `/completions`, and `/chunks` APIs.",
|
||||
"operationId": "ingest_text_v1_ingest_text_post",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/IngestTextBody"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Successful Response",
|
||||
@@ -303,6 +388,20 @@
|
||||
},
|
||||
"components": {
|
||||
"schemas": {
|
||||
"Body_ingest_file_v1_ingest_file_post": {
|
||||
"properties": {
|
||||
"file": {
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
"title": "File"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"file"
|
||||
],
|
||||
"title": "Body_ingest_file_v1_ingest_file_post"
|
||||
},
|
||||
"Body_ingest_v1_ingest_post": {
|
||||
"properties": {
|
||||
"file": {
|
||||
@@ -735,6 +834,30 @@
|
||||
],
|
||||
"title": "IngestResponse"
|
||||
},
|
||||
"IngestTextBody": {
|
||||
"properties": {
|
||||
"file_name": {
|
||||
"type": "string",
|
||||
"title": "File Name",
|
||||
"examples": [
|
||||
"Avatar: The Last Airbender"
|
||||
]
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"title": "Text",
|
||||
"examples": [
|
||||
"Avatar is set in an Asian and Arctic-inspired world in which some people can telekinetically manipulate one of the four elements\u2014water, earth, fire or air\u2014through practices known as 'bending', inspired by Chinese martial arts."
|
||||
]
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"file_name",
|
||||
"text"
|
||||
],
|
||||
"title": "IngestTextBody"
|
||||
},
|
||||
"IngestedDoc": {
|
||||
"properties": {
|
||||
"object": {
|
||||
|
2632
poetry.lock
generated
2632
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from private_gpt.server.ingest.ingest_service import IngestService
|
||||
from private_gpt.server.ingest.model import IngestedDoc
|
||||
@@ -10,14 +10,35 @@ from private_gpt.server.utils.auth import authenticated
|
||||
ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
|
||||
|
||||
|
||||
class IngestTextBody(BaseModel):
|
||||
file_name: str = Field(examples=["Avatar: The Last Airbender"])
|
||||
text: str = Field(
|
||||
examples=[
|
||||
"Avatar is set in an Asian and Arctic-inspired world in which some "
|
||||
"people can telekinetically manipulate one of the four elements—water, "
|
||||
"earth, fire or air—through practices known as 'bending', inspired by "
|
||||
"Chinese martial arts."
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class IngestResponse(BaseModel):
|
||||
object: Literal["list"]
|
||||
model: Literal["private-gpt"]
|
||||
data: list[IngestedDoc]
|
||||
|
||||
|
||||
@ingest_router.post("/ingest", tags=["Ingestion"])
|
||||
@ingest_router.post("/ingest", tags=["Ingestion"], deprecated=True)
|
||||
def ingest(request: Request, file: UploadFile) -> IngestResponse:
|
||||
"""Ingests and processes a file.
|
||||
|
||||
Deprecated. Use ingest/file instead.
|
||||
"""
|
||||
return ingest_file(request, file)
|
||||
|
||||
|
||||
@ingest_router.post("/ingest/file", tags=["Ingestion"])
|
||||
def ingest_file(request: Request, file: UploadFile) -> IngestResponse:
|
||||
"""Ingests and processes a file, storing its chunks to be used as context.
|
||||
|
||||
The context obtained from files is later used in
|
||||
@@ -40,6 +61,26 @@ def ingest(request: Request, file: UploadFile) -> IngestResponse:
|
||||
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
|
||||
|
||||
|
||||
@ingest_router.post("/ingest/text", tags=["Ingestion"])
|
||||
def ingest_text(request: Request, body: IngestTextBody) -> IngestResponse:
|
||||
"""Ingests and processes a text, storing its chunks to be used as context.
|
||||
|
||||
The context obtained from files is later used in
|
||||
`/chat/completions`, `/completions`, and `/chunks` APIs.
|
||||
|
||||
A Document will be generated with the given text. The Document
|
||||
ID is returned in the response, together with the
|
||||
extracted Metadata (which is later used to improve context retrieval). That ID
|
||||
can be used to filter the context used to create responses in
|
||||
`/chat/completions`, `/completions`, and `/chunks` APIs.
|
||||
"""
|
||||
service = request.state.injector.get(IngestService)
|
||||
if len(body.file_name) == 0:
|
||||
raise HTTPException(400, "No file name provided")
|
||||
ingested_documents = service.ingest_text(body.file_name, body.text)
|
||||
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
|
||||
|
||||
|
||||
@ingest_router.get("/ingest/list", tags=["Ingestion"])
|
||||
def list_ingested(request: Request) -> IngestResponse:
|
||||
"""Lists already ingested Documents including their Document ID and metadata.
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import BinaryIO
|
||||
from typing import AnyStr, BinaryIO
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index import (
|
||||
@@ -53,16 +53,7 @@ class IngestService:
|
||||
self.storage_context, self.ingest_service_context, settings=settings()
|
||||
)
|
||||
|
||||
def ingest(self, file_name: str, file_data: Path) -> list[IngestedDoc]:
|
||||
logger.info("Ingesting file_name=%s", file_name)
|
||||
documents = self.ingest_component.ingest(file_name, file_data)
|
||||
return [IngestedDoc.from_document(document) for document in documents]
|
||||
|
||||
def ingest_bin_data(
|
||||
self, file_name: str, raw_file_data: BinaryIO
|
||||
) -> list[IngestedDoc]:
|
||||
logger.debug("Ingesting binary data with file_name=%s", file_name)
|
||||
file_data = raw_file_data.read()
|
||||
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:
|
||||
logger.debug("Got file data of size=%s to ingest", len(file_data))
|
||||
# llama-index mainly supports reading from files, so
|
||||
# we have to create a tmp file to read for it to work
|
||||
@@ -74,11 +65,27 @@ class IngestService:
|
||||
path_to_tmp.write_bytes(file_data)
|
||||
else:
|
||||
path_to_tmp.write_text(str(file_data))
|
||||
return self.ingest(file_name, path_to_tmp)
|
||||
return self.ingest_file(file_name, path_to_tmp)
|
||||
finally:
|
||||
tmp.close()
|
||||
path_to_tmp.unlink()
|
||||
|
||||
def ingest_file(self, file_name: str, file_data: Path) -> list[IngestedDoc]:
|
||||
logger.info("Ingesting file_name=%s", file_name)
|
||||
documents = self.ingest_component.ingest(file_name, file_data)
|
||||
return [IngestedDoc.from_document(document) for document in documents]
|
||||
|
||||
def ingest_text(self, file_name: str, text: str) -> list[IngestedDoc]:
|
||||
logger.debug("Ingesting text data with file_name=%s", file_name)
|
||||
return self._ingest_data(file_name, text)
|
||||
|
||||
def ingest_bin_data(
|
||||
self, file_name: str, raw_file_data: BinaryIO
|
||||
) -> list[IngestedDoc]:
|
||||
logger.debug("Ingesting binary data with file_name=%s", file_name)
|
||||
file_data = raw_file_data.read()
|
||||
return self._ingest_data(file_name, file_data)
|
||||
|
||||
def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[IngestedDoc]:
|
||||
logger.info("Ingesting file_names=%s", [f[0] for f in files])
|
||||
documents = self.ingest_component.bulk_ingest(files)
|
||||
|
@@ -219,13 +219,17 @@ class PrivateGptUi:
|
||||
"justify-content: center;"
|
||||
"align-items: center;"
|
||||
"}"
|
||||
".logo img { height: 25% }",
|
||||
".logo img { height: 25% }"
|
||||
".contain { display: flex !important; flex-direction: column !important; }"
|
||||
"#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }"
|
||||
"#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
|
||||
"#col { height: calc(100vh - 112px - 16px) !important; }",
|
||||
) as blocks:
|
||||
with gr.Row():
|
||||
gr.HTML(f"<div class='logo'/><img src={logo_svg} alt=PrivateGPT></div")
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=3, variant="compact"):
|
||||
with gr.Row(equal_height=False):
|
||||
with gr.Column(scale=3):
|
||||
mode = gr.Radio(
|
||||
MODES,
|
||||
label="Mode",
|
||||
@@ -271,12 +275,13 @@ class PrivateGptUi:
|
||||
inputs=system_prompt_input,
|
||||
)
|
||||
|
||||
with gr.Column(scale=7):
|
||||
with gr.Column(scale=7, elem_id="col"):
|
||||
_ = gr.ChatInterface(
|
||||
self._chat,
|
||||
chatbot=gr.Chatbot(
|
||||
label=f"LLM: {settings().llm.mode}",
|
||||
show_copy_button=True,
|
||||
elem_id="chatbot",
|
||||
render=False,
|
||||
avatar_images=(
|
||||
None,
|
||||
|
@@ -36,7 +36,7 @@ gradio = "^4.4.1"
|
||||
[tool.poetry.group.local]
|
||||
optional = true
|
||||
[tool.poetry.group.local.dependencies]
|
||||
llama-cpp-python = "^0.2.11"
|
||||
llama-cpp-python = "^0.2.23"
|
||||
numpy = "1.26.0"
|
||||
sentence-transformers = "^2.2.2"
|
||||
# https://stackoverflow.com/questions/76327419/valueerror-libcublas-so-0-9-not-found-in-the-system-path
|
||||
|
@@ -48,7 +48,7 @@ class LocalIngestWorker:
|
||||
try:
|
||||
if changed_path.exists():
|
||||
logger.info(f"Started ingesting file={changed_path}")
|
||||
self.ingest_service.ingest(changed_path.name, changed_path)
|
||||
self.ingest_service.ingest_file(changed_path.name, changed_path)
|
||||
logger.info(f"Completed ingesting file={changed_path}")
|
||||
except Exception:
|
||||
logger.exception(
|
||||
|
@@ -48,8 +48,8 @@ qdrant:
|
||||
|
||||
local:
|
||||
prompt_style: "llama2"
|
||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
|
||||
llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
|
||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
|
||||
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
|
||||
embedding_hf_model_name: BAAI/bge-small-en-v1.5
|
||||
|
||||
sagemaker:
|
||||
|
2
tests/fixtures/ingest_helper.py
vendored
2
tests/fixtures/ingest_helper.py
vendored
@@ -13,7 +13,7 @@ class IngestHelper:
|
||||
def ingest_file(self, path: Path) -> IngestResponse:
|
||||
files = {"file": (path.name, path.open("rb"))}
|
||||
|
||||
response = self.test_client.post("/v1/ingest", files=files)
|
||||
response = self.test_client.post("/v1/ingest/file", files=files)
|
||||
assert response.status_code == 200
|
||||
ingest_result = IngestResponse.model_validate(response.json())
|
||||
return ingest_result
|
||||
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from private_gpt.server.ingest.ingest_router import IngestResponse
|
||||
from tests.fixtures.ingest_helper import IngestHelper
|
||||
|
||||
|
||||
@@ -34,3 +35,12 @@ def test_ingest_list_returns_something_after_ingestion(
|
||||
assert (
|
||||
count_ingest_after == count_ingest_before + 1
|
||||
), "The temp doc should be returned"
|
||||
|
||||
|
||||
def test_ingest_plain_text(test_client: TestClient) -> None:
|
||||
response = test_client.post(
|
||||
"/v1/ingest/text", json={"file_name": "file_name", "text": "text"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
ingest_result = IngestResponse.model_validate(response.json())
|
||||
assert len(ingest_result.data) == 1
|
||||
|
Reference in New Issue
Block a user