mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-25 06:52:57 +00:00
Search in Docs to UI (#1186)
Move from Context Chunks JSON response to a more comprehensive Search in Docs functionality
This commit is contained in:
parent
1e96e3a29e
commit
c81f4b2ebd
@ -348,25 +348,24 @@ computations.
|
||||
|
||||
Gradio UI is a ready to use way of testing most of PrivateGPT API functionalities.
|
||||
|
||||

|
||||

|
||||
|
||||
### Execution Modes
|
||||
|
||||
It has 3 modes of execution (you can select in the top-left):
|
||||
|
||||
* Query Documents: uses the context from the
|
||||
* Query Docs: uses the context from the
|
||||
ingested documents to answer the questions posted in the chat. It also takes
|
||||
into account previous chat messages as context.
|
||||
* Makes use of `/chat/completions` API with `use_context=true` and no
|
||||
`context_filter`.
|
||||
* Search in Docs: fast search that returns the 4 most related text
|
||||
chunks, together with their source document and page.
|
||||
* Makes use of `/chunks` API with no `context_filter`, `limit=4` and
|
||||
`prev_next_chunks=0`.
|
||||
* LLM Chat: simple, non-contextual chat with the LLM. The ingested documents won't
|
||||
be taken into account, only the previous messages.
|
||||
* Makes use of `/chat/completions` API with `use_context=false`.
|
||||
* Context Chunks: returns the JSON representation of the 2 most related text
|
||||
chunks, together with their metadata, source document and previous and next
|
||||
chunks.
|
||||
* Makes use of `/chunks` API with no `context_filter`, `limit=2` and
|
||||
`prev_next_chunks=1`.
|
||||
|
||||
### Document Ingestion
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
@ -1,5 +1,4 @@
|
||||
import itertools
|
||||
import json
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Any, TextIO
|
||||
@ -50,7 +49,7 @@ def _chat(message: str, history: list[list[str]], mode: str, *_: Any) -> Any:
|
||||
new_message = ChatMessage(content=message, role=MessageRole.USER)
|
||||
all_messages = [*build_history(), new_message]
|
||||
match mode:
|
||||
case "Query Documents":
|
||||
case "Query Docs":
|
||||
query_stream = chat_service.stream_chat(
|
||||
messages=all_messages,
|
||||
use_context=True,
|
||||
@ -64,16 +63,16 @@ def _chat(message: str, history: list[list[str]], mode: str, *_: Any) -> Any:
|
||||
)
|
||||
yield from yield_deltas(llm_stream)
|
||||
|
||||
case "Context Chunks":
|
||||
case "Search in Docs":
|
||||
response = chunks_service.retrieve_relevant(
|
||||
text=message,
|
||||
limit=2,
|
||||
prev_next_chunks=1,
|
||||
).__iter__()
|
||||
yield "```" + json.dumps(
|
||||
[node.__dict__ for node in response],
|
||||
default=lambda o: o.__dict__,
|
||||
indent=2,
|
||||
text=message, limit=4, prev_next_chunks=0
|
||||
)
|
||||
|
||||
yield "\n\n\n".join(
|
||||
f"{index}. **{chunk.document.doc_metadata['file_name'] if chunk.document.doc_metadata else ''} "
|
||||
f"(page {chunk.document.doc_metadata['page_label'] if chunk.document.doc_metadata else ''})**\n "
|
||||
f"{chunk.text}"
|
||||
for index, chunk in enumerate(response, start=1)
|
||||
)
|
||||
|
||||
|
||||
@ -117,9 +116,9 @@ with gr.Blocks(
|
||||
with gr.Row():
|
||||
with gr.Column(scale=3, variant="compact"):
|
||||
mode = gr.Radio(
|
||||
["Query Documents", "LLM Chat", "Context Chunks"],
|
||||
["Query Docs", "Search in Docs", "LLM Chat"],
|
||||
label="Mode",
|
||||
value="Query Documents",
|
||||
value="Query Docs",
|
||||
)
|
||||
upload_button = gr.components.UploadButton(
|
||||
"Upload a File",
|
||||
|
Loading…
Reference in New Issue
Block a user