feat(UI): Select file to Query or Delete + Delete ALL (#1612)

---------

Co-authored-by: Robin Boone <rboone@sofics.com>
This commit is contained in:
Iván Martínez 2024-02-16 17:36:09 +01:00 committed by GitHub
parent 24fb80ca38
commit aa13afde07
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 161 additions and 18 deletions

19
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "accelerate"
@ -1273,13 +1273,13 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
[[package]]
name = "gradio"
version = "4.10.0"
version = "4.19.0"
description = "Python library for easily interacting with trained machine learning models"
optional = false
python-versions = ">=3.8"
files = [
{file = "gradio-4.10.0-py3-none-any.whl", hash = "sha256:7595185716aff430381d010087d6ebc4eadef06fefc3dc1cfa76edcdd2c109db"},
{file = "gradio-4.10.0.tar.gz", hash = "sha256:d4ca039aa7f5c2783b2bbf7b465153c80bb4257edcca4d8b9c59ce6f61a75b97"},
{file = "gradio-4.19.0-py3-none-any.whl", hash = "sha256:d09732190acc0f33b5e7ea3235d267472bf74beeea62dabb7a82f93193155e09"},
{file = "gradio-4.19.0.tar.gz", hash = "sha256:e77e3ce8a4113865abd1dcf92cc9426d9da4896e0a6fd2824a0c90ec751dd442"},
]
[package.dependencies]
@ -1287,7 +1287,7 @@ aiofiles = ">=22.0,<24.0"
altair = ">=4.2.0,<6.0"
fastapi = "*"
ffmpy = "*"
gradio-client = "0.7.3"
gradio-client = "0.10.0"
httpx = "*"
huggingface-hub = ">=0.19.3"
importlib-resources = ">=1.3,<7.0"
@ -1303,6 +1303,7 @@ pydantic = ">=2.0"
pydub = "*"
python-multipart = "*"
pyyaml = ">=5.0,<7.0"
ruff = ">=0.1.7"
semantic-version = ">=2.0,<3.0"
tomlkit = "0.12.0"
typer = {version = ">=0.9,<1.0", extras = ["all"]}
@ -1314,13 +1315,13 @@ oauth = ["authlib", "itsdangerous"]
[[package]]
name = "gradio-client"
version = "0.7.3"
version = "0.10.0"
description = "Python library for easily interacting with trained machine learning models"
optional = false
python-versions = ">=3.8"
files = [
{file = "gradio_client-0.7.3-py3-none-any.whl", hash = "sha256:b91073770470ceb9f284977064c35bc0cffaf868eb887bf352db77aa01fe342a"},
{file = "gradio_client-0.7.3.tar.gz", hash = "sha256:8146a1d19a125b38088dd201ddacd0008ea47ef9b0504d1c5b87ca09a43f4dcd"},
{file = "gradio_client-0.10.0-py3-none-any.whl", hash = "sha256:2bcfe61710f9f1c8f336fa9ff0f5c5f0ea52079233196cd753ad30cccdfd585c"},
{file = "gradio_client-0.10.0.tar.gz", hash = "sha256:feaee70f18363d76f81a7d25fc3456f40ed5f92417e642c8f1bf86dc65e3a981"},
]
[package.dependencies]
@ -6111,4 +6112,4 @@ chroma = ["chromadb"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.12"
content-hash = "c2bcf29b5c894a0fae9682145cd001dfb57bb4919c9097b5e27323ddee58fc8c"
content-hash = "121bf7797b74c02efaf11712e178c9c01880b79701eeff6485ede9ca8b25d307"

View File

@ -189,6 +189,12 @@ class UISettings(BaseModel):
default_query_system_prompt: str = Field(
None, description="The default system prompt to use for the query mode."
)
delete_file_button_enabled: bool = Field(
True, description="If the button to delete a file is enabled or not."
)
delete_all_files_button_enabled: bool = Field(
False, description="If the button to delete all files is enabled or not."
)
class QdrantSettings(BaseModel):

View File

@ -15,6 +15,7 @@ from pydantic import BaseModel
from private_gpt.constants import PROJECT_ROOT_PATH
from private_gpt.di import global_injector
from private_gpt.open_ai.extensions.context_filter import ContextFilter
from private_gpt.server.chat.chat_service import ChatService, CompletionGen
from private_gpt.server.chunks.chunks_service import Chunk, ChunksService
from private_gpt.server.ingest.ingest_service import IngestService
@ -31,7 +32,7 @@ UI_TAB_TITLE = "My Private GPT"
SOURCES_SEPARATOR = "\n\n Sources: \n"
MODES = ["Query Docs", "Search in Docs", "LLM Chat"]
MODES = ["Query Files", "Search Files", "LLM Chat (no context from files)"]
class Source(BaseModel):
@ -74,6 +75,8 @@ class PrivateGptUi:
# Cache the UI blocks
self._ui_block = None
self._selected_filename = None
# Initialize system prompt based on default mode
self.mode = MODES[0]
self._system_prompt = self._get_default_system_prompt(self.mode)
@ -132,20 +135,34 @@ class PrivateGptUi:
),
)
match mode:
case "Query Docs":
case "Query Files":
# Use only the selected file for the query
context_filter = None
if self._selected_filename is not None:
docs_ids = []
for ingested_document in self._ingest_service.list_ingested():
if (
ingested_document.doc_metadata["file_name"]
== self._selected_filename
):
docs_ids.append(ingested_document.doc_id)
context_filter = ContextFilter(docs_ids=docs_ids)
query_stream = self._chat_service.stream_chat(
messages=all_messages,
use_context=True,
context_filter=context_filter,
)
yield from yield_deltas(query_stream)
case "LLM Chat":
case "LLM Chat (no context from files)":
llm_stream = self._chat_service.stream_chat(
messages=all_messages,
use_context=False,
)
yield from yield_deltas(llm_stream)
case "Search in Docs":
case "Search Files":
response = self._chunks_service.retrieve_relevant(
text=message, limit=4, prev_next_chunks=0
)
@ -166,10 +183,10 @@ class PrivateGptUi:
p = ""
match mode:
# For query chat mode, obtain default system prompt from settings
case "Query Docs":
case "Query Files":
p = settings().ui.default_query_system_prompt
# For chat mode, obtain default system prompt from settings
case "LLM Chat":
case "LLM Chat (no context from files)":
p = settings().ui.default_chat_system_prompt
# For any other mode, clear the system prompt
case _:
@ -205,8 +222,71 @@ class PrivateGptUi:
def _upload_file(self, files: list[str]) -> None:
logger.debug("Loading count=%s files", len(files))
paths = [Path(file) for file in files]
# remove all existing Documents with name identical to a new file upload:
file_names = [path.name for path in paths]
doc_ids_to_delete = []
for ingested_document in self._ingest_service.list_ingested():
if (
ingested_document.doc_metadata
and ingested_document.doc_metadata["file_name"] in file_names
):
doc_ids_to_delete.append(ingested_document.doc_id)
if len(doc_ids_to_delete) > 0:
logger.info(
"Uploading file(s) which were already ingested: %s document(s) will be replaced.",
len(doc_ids_to_delete),
)
for doc_id in doc_ids_to_delete:
self._ingest_service.delete(doc_id)
self._ingest_service.bulk_ingest([(str(path.name), path) for path in paths])
def _delete_all_files(self) -> Any:
ingested_files = self._ingest_service.list_ingested()
logger.debug("Deleting count=%s files", len(ingested_files))
for ingested_document in ingested_files:
self._ingest_service.delete(ingested_document.doc_id)
return [
gr.List(self._list_ingested_files()),
gr.components.Button(interactive=False),
gr.components.Button(interactive=False),
gr.components.Textbox("All files"),
]
def _delete_selected_file(self) -> Any:
logger.debug("Deleting selected %s", self._selected_filename)
# Note: keep looping for pdf's (each page became a Document)
for ingested_document in self._ingest_service.list_ingested():
if (
ingested_document.doc_metadata
and ingested_document.doc_metadata["file_name"]
== self._selected_filename
):
self._ingest_service.delete(ingested_document.doc_id)
return [
gr.List(self._list_ingested_files()),
gr.components.Button(interactive=False),
gr.components.Button(interactive=False),
gr.components.Textbox("All files"),
]
def _deselect_selected_file(self) -> Any:
self._selected_filename = None
return [
gr.components.Button(interactive=False),
gr.components.Button(interactive=False),
gr.components.Textbox("All files"),
]
def _selected_a_file(self, select_data: gr.SelectData) -> Any:
self._selected_filename = select_data.value
return [
gr.components.Button(interactive=True),
gr.components.Button(interactive=True),
gr.components.Textbox(self._selected_filename),
]
def _build_ui_blocks(self) -> gr.Blocks:
logger.debug("Creating the UI blocks")
with gr.Blocks(
@ -235,7 +315,7 @@ class PrivateGptUi:
mode = gr.Radio(
MODES,
label="Mode",
value="Query Docs",
value="Query Files",
)
upload_button = gr.components.UploadButton(
"Upload File(s)",
@ -247,6 +327,7 @@ class PrivateGptUi:
self._list_ingested_files,
headers=["File name"],
label="Ingested Files",
height=235,
interactive=False,
render=False, # Rendered under the button
)
@ -260,6 +341,57 @@ class PrivateGptUi:
outputs=ingested_dataset,
)
ingested_dataset.render()
deselect_file_button = gr.components.Button(
"De-select selected file", size="sm", interactive=False
)
selected_text = gr.components.Textbox(
"All files", label="Selected for Query or Deletion", max_lines=1
)
delete_file_button = gr.components.Button(
"🗑️ Delete selected file",
size="sm",
visible=settings().ui.delete_file_button_enabled,
interactive=False,
)
delete_files_button = gr.components.Button(
"⚠️ Delete ALL files",
size="sm",
visible=settings().ui.delete_all_files_button_enabled,
)
deselect_file_button.click(
self._deselect_selected_file,
outputs=[
delete_file_button,
deselect_file_button,
selected_text,
],
)
ingested_dataset.select(
fn=self._selected_a_file,
outputs=[
delete_file_button,
deselect_file_button,
selected_text,
],
)
delete_file_button.click(
self._delete_selected_file,
outputs=[
ingested_dataset,
delete_file_button,
deselect_file_button,
selected_text,
],
)
delete_files_button.click(
self._delete_all_files,
outputs=[
ingested_dataset,
delete_file_button,
deselect_file_button,
selected_text,
],
)
system_prompt_input = gr.Textbox(
placeholder=self._system_prompt,
label="System Prompt",

View File

@ -31,7 +31,7 @@ types-pyyaml = "^6.0.12.12"
[tool.poetry.group.ui]
optional = true
[tool.poetry.group.ui.dependencies]
gradio = "^4.4.1"
gradio = "^4.19.0"
[tool.poetry.group.local]
optional = true

View File

@ -18,10 +18,11 @@ class LocalIngestWorker:
self.total_documents = 0
self.current_document_count = 0
self._files_under_root_folder: list[Path] = list()
self._files_under_root_folder: list[Path] = []
def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None:
"""Search all files under the root folder recursively.
Count them at the same time
"""
for file_path in root_path.iterdir():

View File

@ -31,6 +31,9 @@ ui:
You can only answer questions about the provided context.
If you know the answer but it is not based in the provided context, don't provide
the answer, just state the answer is not in the context provided.
delete_file_button_enabled: true
delete_all_files_button_enabled: true
llm:
mode: local