diff --git a/poetry.lock b/poetry.lock index 8466ddbf..dfdafd2f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -1273,13 +1273,13 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] [[package]] name = "gradio" -version = "4.10.0" +version = "4.19.0" description = "Python library for easily interacting with trained machine learning models" optional = false python-versions = ">=3.8" files = [ - {file = "gradio-4.10.0-py3-none-any.whl", hash = "sha256:7595185716aff430381d010087d6ebc4eadef06fefc3dc1cfa76edcdd2c109db"}, - {file = "gradio-4.10.0.tar.gz", hash = "sha256:d4ca039aa7f5c2783b2bbf7b465153c80bb4257edcca4d8b9c59ce6f61a75b97"}, + {file = "gradio-4.19.0-py3-none-any.whl", hash = "sha256:d09732190acc0f33b5e7ea3235d267472bf74beeea62dabb7a82f93193155e09"}, + {file = "gradio-4.19.0.tar.gz", hash = "sha256:e77e3ce8a4113865abd1dcf92cc9426d9da4896e0a6fd2824a0c90ec751dd442"}, ] [package.dependencies] @@ -1287,7 +1287,7 @@ aiofiles = ">=22.0,<24.0" altair = ">=4.2.0,<6.0" fastapi = "*" ffmpy = "*" -gradio-client = "0.7.3" +gradio-client = "0.10.0" httpx = "*" huggingface-hub = ">=0.19.3" importlib-resources = ">=1.3,<7.0" @@ -1303,6 +1303,7 @@ pydantic = ">=2.0" pydub = "*" python-multipart = "*" pyyaml = ">=5.0,<7.0" +ruff = ">=0.1.7" semantic-version = ">=2.0,<3.0" tomlkit = "0.12.0" typer = {version = ">=0.9,<1.0", extras = ["all"]} @@ -1314,13 +1315,13 @@ oauth = ["authlib", "itsdangerous"] [[package]] name = "gradio-client" -version = "0.7.3" +version = "0.10.0" description = "Python library for easily interacting with trained machine learning models" optional = false python-versions = ">=3.8" files = [ - {file = "gradio_client-0.7.3-py3-none-any.whl", hash = "sha256:b91073770470ceb9f284977064c35bc0cffaf868eb887bf352db77aa01fe342a"}, - {file = "gradio_client-0.7.3.tar.gz", hash = "sha256:8146a1d19a125b38088dd201ddacd0008ea47ef9b0504d1c5b87ca09a43f4dcd"}, + {file = "gradio_client-0.10.0-py3-none-any.whl", hash = "sha256:2bcfe61710f9f1c8f336fa9ff0f5c5f0ea52079233196cd753ad30cccdfd585c"}, + {file = "gradio_client-0.10.0.tar.gz", hash = "sha256:feaee70f18363d76f81a7d25fc3456f40ed5f92417e642c8f1bf86dc65e3a981"}, ] [package.dependencies] @@ -6111,4 +6112,4 @@ chroma = ["chromadb"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "c2bcf29b5c894a0fae9682145cd001dfb57bb4919c9097b5e27323ddee58fc8c" +content-hash = "121bf7797b74c02efaf11712e178c9c01880b79701eeff6485ede9ca8b25d307" diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index ed65c203..8e022949 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -189,6 +189,12 @@ class UISettings(BaseModel): default_query_system_prompt: str = Field( None, description="The default system prompt to use for the query mode." ) + delete_file_button_enabled: bool = Field( + True, description="If the button to delete a file is enabled or not." + ) + delete_all_files_button_enabled: bool = Field( + False, description="If the button to delete all files is enabled or not." + ) class QdrantSettings(BaseModel): diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py index c7b538a3..a4b131fe 100644 --- a/private_gpt/ui/ui.py +++ b/private_gpt/ui/ui.py @@ -15,6 +15,7 @@ from pydantic import BaseModel from private_gpt.constants import PROJECT_ROOT_PATH from private_gpt.di import global_injector +from private_gpt.open_ai.extensions.context_filter import ContextFilter from private_gpt.server.chat.chat_service import ChatService, CompletionGen from private_gpt.server.chunks.chunks_service import Chunk, ChunksService from private_gpt.server.ingest.ingest_service import IngestService @@ -31,7 +32,7 @@ UI_TAB_TITLE = "My Private GPT" SOURCES_SEPARATOR = "\n\n Sources: \n" -MODES = ["Query Docs", "Search in Docs", "LLM Chat"] +MODES = ["Query Files", "Search Files", "LLM Chat (no context from files)"] class Source(BaseModel): @@ -74,6 +75,8 @@ class PrivateGptUi: # Cache the UI blocks self._ui_block = None + self._selected_filename = None + # Initialize system prompt based on default mode self.mode = MODES[0] self._system_prompt = self._get_default_system_prompt(self.mode) @@ -132,20 +135,34 @@ class PrivateGptUi: ), ) match mode: - case "Query Docs": + case "Query Files": + + # Use only the selected file for the query + context_filter = None + if self._selected_filename is not None: + docs_ids = [] + for ingested_document in self._ingest_service.list_ingested(): + if ( + ingested_document.doc_metadata["file_name"] + == self._selected_filename + ): + docs_ids.append(ingested_document.doc_id) + context_filter = ContextFilter(docs_ids=docs_ids) + query_stream = self._chat_service.stream_chat( messages=all_messages, use_context=True, + context_filter=context_filter, ) yield from yield_deltas(query_stream) - case "LLM Chat": + case "LLM Chat (no context from files)": llm_stream = self._chat_service.stream_chat( messages=all_messages, use_context=False, ) yield from yield_deltas(llm_stream) - case "Search in Docs": + case "Search Files": response = self._chunks_service.retrieve_relevant( text=message, limit=4, prev_next_chunks=0 ) @@ -166,10 +183,10 @@ class PrivateGptUi: p = "" match mode: # For query chat mode, obtain default system prompt from settings - case "Query Docs": + case "Query Files": p = settings().ui.default_query_system_prompt # For chat mode, obtain default system prompt from settings - case "LLM Chat": + case "LLM Chat (no context from files)": p = settings().ui.default_chat_system_prompt # For any other mode, clear the system prompt case _: @@ -205,8 +222,71 @@ class PrivateGptUi: def _upload_file(self, files: list[str]) -> None: logger.debug("Loading count=%s files", len(files)) paths = [Path(file) for file in files] + + # remove all existing Documents with name identical to a new file upload: + file_names = [path.name for path in paths] + doc_ids_to_delete = [] + for ingested_document in self._ingest_service.list_ingested(): + if ( + ingested_document.doc_metadata + and ingested_document.doc_metadata["file_name"] in file_names + ): + doc_ids_to_delete.append(ingested_document.doc_id) + if len(doc_ids_to_delete) > 0: + logger.info( + "Uploading file(s) which were already ingested: %s document(s) will be replaced.", + len(doc_ids_to_delete), + ) + for doc_id in doc_ids_to_delete: + self._ingest_service.delete(doc_id) + self._ingest_service.bulk_ingest([(str(path.name), path) for path in paths]) + def _delete_all_files(self) -> Any: + ingested_files = self._ingest_service.list_ingested() + logger.debug("Deleting count=%s files", len(ingested_files)) + for ingested_document in ingested_files: + self._ingest_service.delete(ingested_document.doc_id) + return [ + gr.List(self._list_ingested_files()), + gr.components.Button(interactive=False), + gr.components.Button(interactive=False), + gr.components.Textbox("All files"), + ] + + def _delete_selected_file(self) -> Any: + logger.debug("Deleting selected %s", self._selected_filename) + # Note: keep looping for pdf's (each page became a Document) + for ingested_document in self._ingest_service.list_ingested(): + if ( + ingested_document.doc_metadata + and ingested_document.doc_metadata["file_name"] + == self._selected_filename + ): + self._ingest_service.delete(ingested_document.doc_id) + return [ + gr.List(self._list_ingested_files()), + gr.components.Button(interactive=False), + gr.components.Button(interactive=False), + gr.components.Textbox("All files"), + ] + + def _deselect_selected_file(self) -> Any: + self._selected_filename = None + return [ + gr.components.Button(interactive=False), + gr.components.Button(interactive=False), + gr.components.Textbox("All files"), + ] + + def _selected_a_file(self, select_data: gr.SelectData) -> Any: + self._selected_filename = select_data.value + return [ + gr.components.Button(interactive=True), + gr.components.Button(interactive=True), + gr.components.Textbox(self._selected_filename), + ] + def _build_ui_blocks(self) -> gr.Blocks: logger.debug("Creating the UI blocks") with gr.Blocks( @@ -235,7 +315,7 @@ class PrivateGptUi: mode = gr.Radio( MODES, label="Mode", - value="Query Docs", + value="Query Files", ) upload_button = gr.components.UploadButton( "Upload File(s)", @@ -247,6 +327,7 @@ class PrivateGptUi: self._list_ingested_files, headers=["File name"], label="Ingested Files", + height=235, interactive=False, render=False, # Rendered under the button ) @@ -260,6 +341,57 @@ class PrivateGptUi: outputs=ingested_dataset, ) ingested_dataset.render() + deselect_file_button = gr.components.Button( + "De-select selected file", size="sm", interactive=False + ) + selected_text = gr.components.Textbox( + "All files", label="Selected for Query or Deletion", max_lines=1 + ) + delete_file_button = gr.components.Button( + "🗑️ Delete selected file", + size="sm", + visible=settings().ui.delete_file_button_enabled, + interactive=False, + ) + delete_files_button = gr.components.Button( + "⚠️ Delete ALL files", + size="sm", + visible=settings().ui.delete_all_files_button_enabled, + ) + deselect_file_button.click( + self._deselect_selected_file, + outputs=[ + delete_file_button, + deselect_file_button, + selected_text, + ], + ) + ingested_dataset.select( + fn=self._selected_a_file, + outputs=[ + delete_file_button, + deselect_file_button, + selected_text, + ], + ) + delete_file_button.click( + self._delete_selected_file, + outputs=[ + ingested_dataset, + delete_file_button, + deselect_file_button, + selected_text, + ], + ) + delete_files_button.click( + self._delete_all_files, + outputs=[ + ingested_dataset, + delete_file_button, + deselect_file_button, + selected_text, + ], + ) system_prompt_input = gr.Textbox( placeholder=self._system_prompt, label="System Prompt", diff --git a/pyproject.toml b/pyproject.toml index e75a7cb9..97db9986 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ types-pyyaml = "^6.0.12.12" [tool.poetry.group.ui] optional = true [tool.poetry.group.ui.dependencies] -gradio = "^4.4.1" +gradio = "^4.19.0" [tool.poetry.group.local] optional = true diff --git a/scripts/ingest_folder.py b/scripts/ingest_folder.py index 8c6acad1..ccda87cc 100755 --- a/scripts/ingest_folder.py +++ b/scripts/ingest_folder.py @@ -18,10 +18,11 @@ class LocalIngestWorker: self.total_documents = 0 self.current_document_count = 0 - self._files_under_root_folder: list[Path] = list() + self._files_under_root_folder: list[Path] = [] def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None: """Search all files under the root folder recursively. + Count them at the same time """ for file_path in root_path.iterdir(): diff --git a/settings.yaml b/settings.yaml index 0ffbfcae..632c12ce 100644 --- a/settings.yaml +++ b/settings.yaml @@ -31,6 +31,9 @@ ui: You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided. + delete_file_button_enabled: true + delete_all_files_button_enabled: true + llm: mode: local