From 8119842ae6f1f5ecfaf42b06fa0d1ffec675def4 Mon Sep 17 00:00:00 2001
From: Javier Martinez <javiermartinezalvarez98@gmail.com>
Date: Wed, 31 Jul 2024 16:53:27 +0200
Subject: [PATCH] feat(recipe): add our first recipe  `Summarize` (#2028)

* feat: add summary recipe

* test: add summary tests

* docs: move all recipes docs

* docs: add recipes and summarize doc

* docs: update openapi reference

* refactor: split method in two method (summary)

* feat: add initial summarize ui

* feat: add mode explanation

* fix: mypy

* feat: allow to configure async property in summarize

* refactor: move modes to enum and update mode explanations

* docs: fix url

* docs: remove list-llm pages

* docs: remove double header

* fix: summary description
---
 fern/docs.yml                                 |  12 +-
 fern/docs/pages/recipes/list-llm.mdx          | 122 -------------
 fern/docs/pages/recipes/quickstart.mdx        |  23 +++
 fern/docs/pages/recipes/summarize.mdx         |  20 ++
 fern/openapi/openapi.json                     | 158 ++++++++++++++++
 private_gpt/launcher.py                       |   4 +-
 .../server/recipes/summarize/__init__.py      |   0
 .../recipes/summarize/summarize_router.py     |  86 +++++++++
 .../recipes/summarize/summarize_service.py    | 172 ++++++++++++++++++
 private_gpt/settings/settings.py              |  12 ++
 private_gpt/ui/ui.py                          | 113 ++++++++++--
 settings.yaml                                 |  10 +
 tests/server/recipes/test_summarize_router.py | 159 ++++++++++++++++
 13 files changed, 743 insertions(+), 148 deletions(-)
 delete mode 100644 fern/docs/pages/recipes/list-llm.mdx
 create mode 100644 fern/docs/pages/recipes/quickstart.mdx
 create mode 100644 fern/docs/pages/recipes/summarize.mdx
 create mode 100644 private_gpt/server/recipes/summarize/__init__.py
 create mode 100644 private_gpt/server/recipes/summarize/summarize_router.py
 create mode 100644 private_gpt/server/recipes/summarize/summarize_service.py
 create mode 100644 tests/server/recipes/test_summarize_router.py

diff --git a/fern/docs.yml b/fern/docs.yml
index 9dae9340..e0a5c423 100644
--- a/fern/docs.yml
+++ b/fern/docs.yml
@@ -74,14 +74,16 @@ navigation:
             path: ./docs/pages/ui/gradio.mdx
           - page: Alternatives
             path: ./docs/pages/ui/alternatives.mdx
-  # Small code snippet or example of usage to help users
   - tab: recipes
     layout:
-      - section: Choice of LLM
+      - section: Getting started
         contents:
-          # TODO: add recipes
-          - page: List of LLMs
-            path: ./docs/pages/recipes/list-llm.mdx
+          - page: Quickstart
+            path: ./docs/pages/recipes/quickstart.mdx
+      - section: General use cases
+        contents:
+          - page: Summarize
+            path: ./docs/pages/recipes/summarize.mdx
   # More advanced usage of PrivateGPT, by API
   - tab: api-reference
     layout:
diff --git a/fern/docs/pages/recipes/list-llm.mdx b/fern/docs/pages/recipes/list-llm.mdx
deleted file mode 100644
index 103867a0..00000000
--- a/fern/docs/pages/recipes/list-llm.mdx
+++ /dev/null
@@ -1,122 +0,0 @@
-# List of working LLM
-
-**Do you have any working combination of LLM and embeddings?**
-
-Please open a PR to add it to the list, and come on our Discord to tell us about it!
-
-## Prompt style
-
-LLMs might have been trained with different prompt styles.
-The prompt style is the way the prompt is written, and how the system message is injected in the prompt.
-
-For example, `llama2` looks like this:
-```text
-<s>[INST] <<SYS>>
-{{ system_prompt }}
-<</SYS>>
-
-{{ user_message }} [/INST]
-```
-
-While `default` (the `llama_index` default) looks like this:
-```text
-system: {{ system_prompt }}
-user: {{ user_message }}
-assistant: {{ assistant_message }}
-```
-
-The "`tag`" style looks like this:
-
-```text
-<|system|>: {{ system_prompt }}
-<|user|>: {{ user_message }}
-<|assistant|>: {{ assistant_message }}
-```
-
-The "`mistral`" style looks like this: 
-
-```text 
-<s>[INST] You are an AI assistant. [/INST]</s>[INST] Hello, how are you doing? [/INST]
-```
-
-The "`chatml`" style looks like this: 
-```text
-<|im_start|>system
-{{ system_prompt }}<|im_end|>
-<|im_start|>user"
-{{ user_message }}<|im_end|>
-<|im_start|>assistant
-{{ assistant_message }}
-```
-
-Some LLMs will not understand these prompt styles, and will not work (returning nothing).
-You can try to change the prompt style to `default` (or `tag`) in the settings, and it will
-change the way the messages are formatted to be passed to the LLM.
-
-## Example of configuration
-
-You might want to change the prompt depending on the language and model you are using.
-
-### English, with instructions
-
-`settings-en.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
-  prompt_style: "llama2"
-```
-
-### French, with instructions
-
-`settings-fr.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/Vigogne-2-7B-Instruct-GGUF
-  llm_hf_model_file: vigogne-2-7b-instruct.Q4_K_M.gguf
-  embedding_hf_model_name: dangvantuan/sentence-camembert-base
-  prompt_style: "default"
-  # prompt_style: "tag" # also works
-  # The default system prompt is injected only when the `prompt_style` != default, and there are no system message in the discussion
-  # default_system_prompt: Vous êtes un assistant IA qui répond à la question posée à la fin en utilisant le contexte suivant. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse. Veuillez répondre exclusivement en français.
-```
-
-You might want to change the prompt as the one above might not directly answer your question.
-You can read online about how to write a good prompt, but in a nutshell, make it (extremely) directive.
-
-You can try and troubleshot your prompt by writing multiline requests in the UI, while
-writing your interaction with the model, for example:
-
-```text
-Tu es un programmeur senior qui programme en python et utilise le framework fastapi. Ecrit moi un serveur qui retourne "hello world".
-```
-
-Another example:
-```text
-Context: None
-Situation: tu es au milieu d'un champ.
-Tache: va a la rivière, en bas du champ.
-Décrit comment aller a la rivière.
-```
-
-### Optimised Models
-GodziLLa2-70B LLM (English, rank 2 on HuggingFace OpenLLM Leaderboard), bge large Embedding Model (rank 1 on HuggingFace MTEB Leaderboard)
-`settings-optimised.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/GodziLLa2-70B-GGUF
-  llm_hf_model_file: godzilla2-70b.Q4_K_M.gguf
-  embedding_hf_model_name: BAAI/bge-large-en
-  prompt_style: "llama2"
-```
-### German speaking model
-`settings-de.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/em_german_leo_mistral-GGUF
-  llm_hf_model_file:   em_german_leo_mistral.Q4_K_M.gguf
-  embedding_hf_model_name: T-Systems-onsite/german-roberta-sentence-transformer-v2
-  #llama, default or tag
-  prompt_style: "default"
-```
diff --git a/fern/docs/pages/recipes/quickstart.mdx b/fern/docs/pages/recipes/quickstart.mdx
new file mode 100644
index 00000000..a0f6c877
--- /dev/null
+++ b/fern/docs/pages/recipes/quickstart.mdx
@@ -0,0 +1,23 @@
+# Recipes
+
+Recipes are predefined use cases that help users solve very specific tasks using PrivateGPT.
+They provide a streamlined approach to achieve common goals with the platform, offering both a starting point and inspiration for further exploration.
+The main goal of Recipes is to empower the community to create and share solutions, expanding the capabilities of PrivateGPT.
+
+## How to Create a New Recipe
+
+1. **Identify the Task**: Define a specific task or problem that the Recipe will address.
+2. **Develop the Solution**: Create a clear and concise guide, including any necessary code snippets or configurations.
+3. **Submit a PR**: Fork the PrivateGPT repository, add your Recipe to the appropriate section, and submit a PR for review.
+
+We encourage you to be creative and think outside the box! Your contributions help shape the future of PrivateGPT.
+
+## Available Recipes
+
+<Cards>
+  <Card
+    title="Summarize"
+    icon="fa-solid fa-file-alt"
+    href="/recipes/general-use-cases/summarize"
+  />
+</Cards>
diff --git a/fern/docs/pages/recipes/summarize.mdx b/fern/docs/pages/recipes/summarize.mdx
new file mode 100644
index 00000000..99594bf1
--- /dev/null
+++ b/fern/docs/pages/recipes/summarize.mdx
@@ -0,0 +1,20 @@
+The Summarize Recipe provides a method to extract concise summaries from ingested documents or texts using PrivateGPT.
+This tool is particularly useful for quickly understanding large volumes of information by distilling key points and main ideas.
+
+## Use Case
+
+The primary use case for the `Summarize` tool is to automate the summarization of lengthy documents,
+making it easier for users to grasp the essential information without reading through entire texts.
+This can be applied in various scenarios, such as summarizing research papers, news articles, or business reports.
+
+## Key Features
+
+1. **Ingestion-compatible**: The user provides the text to be summarized. The text can be directly inputted or retrieved from ingested documents within the system.
+2. **Customization**: The summary generation can be influenced by providing specific `instructions` or a `prompt`. These inputs guide the model on how to frame the summary, allowing for customization according to user needs.
+3. **Streaming Support**: The tool supports streaming, allowing for real-time summary generation, which can be particularly useful for handling large texts or providing immediate feedback.
+
+## Contributing
+
+If you have ideas for improving the Summarize or want to add new features, feel free to contribute!
+You can submit your enhancements via a pull request on our [GitHub repository](https://github.com/zylon-ai/private-gpt).
+
diff --git a/fern/openapi/openapi.json b/fern/openapi/openapi.json
index af1646f1..c17c4d12 100644
--- a/fern/openapi/openapi.json
+++ b/fern/openapi/openapi.json
@@ -339,6 +339,48 @@
         }
       }
     },
+    "/v1/summarize": {
+      "post": {
+        "tags": [
+          "Recipes"
+        ],
+        "summary": "Summarize",
+        "description": "Given a text, the model will return a summary.\n\nOptionally include `instructions` to influence the way the summary is generated.\n\nIf `use_context`\nis set to `true`, the model will also use the content coming from the ingested\ndocuments in the summary. The documents being used can\nbe filtered by their metadata using the `context_filter`.\nIngested documents metadata can be found using `/ingest/list` endpoint.\nIf you want all ingested documents to be used, remove `context_filter` altogether.\n\nIf `prompt` is set, it will be used as the prompt for the summarization,\notherwise the default prompt will be used.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```",
+        "operationId": "summarize_v1_summarize_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/SummarizeBody"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SummarizeResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
     "/v1/embeddings": {
       "post": {
         "tags": [
@@ -500,6 +542,10 @@
       "Chunk": {
         "properties": {
           "object": {
+            "type": "string",
+            "enum": [
+              "context.chunk"
+            ],
             "const": "context.chunk",
             "title": "Object"
           },
@@ -612,10 +658,18 @@
       "ChunksResponse": {
         "properties": {
           "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
             "const": "list",
             "title": "Object"
           },
           "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
             "const": "private-gpt",
             "title": "Model"
           },
@@ -728,6 +782,10 @@
             "title": "Index"
           },
           "object": {
+            "type": "string",
+            "enum": [
+              "embedding"
+            ],
             "const": "embedding",
             "title": "Object"
           },
@@ -779,10 +837,18 @@
       "EmbeddingsResponse": {
         "properties": {
           "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
             "const": "list",
             "title": "Object"
           },
           "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
             "const": "private-gpt",
             "title": "Model"
           },
@@ -818,6 +884,10 @@
       "HealthResponse": {
         "properties": {
           "status": {
+            "type": "string",
+            "enum": [
+              "ok"
+            ],
             "const": "ok",
             "title": "Status",
             "default": "ok"
@@ -829,10 +899,18 @@
       "IngestResponse": {
         "properties": {
           "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
             "const": "list",
             "title": "Object"
           },
           "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
             "const": "private-gpt",
             "title": "Model"
           },
@@ -879,6 +957,10 @@
       "IngestedDoc": {
         "properties": {
           "object": {
+            "type": "string",
+            "enum": [
+              "ingest.document"
+            ],
             "const": "ingest.document",
             "title": "Object"
           },
@@ -1001,6 +1083,10 @@
             ]
           },
           "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
             "const": "private-gpt",
             "title": "Model"
           },
@@ -1074,6 +1160,78 @@
         "title": "OpenAIMessage",
         "description": "Inference result, with the source of the message.\n\nRole could be the assistant or system\n(providing a default response, not AI generated)."
       },
+      "SummarizeBody": {
+        "properties": {
+          "text": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Text"
+          },
+          "use_context": {
+            "type": "boolean",
+            "title": "Use Context",
+            "default": false
+          },
+          "context_filter": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/ContextFilter"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "prompt": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Prompt"
+          },
+          "instructions": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Instructions"
+          },
+          "stream": {
+            "type": "boolean",
+            "title": "Stream",
+            "default": false
+          }
+        },
+        "type": "object",
+        "title": "SummarizeBody"
+      },
+      "SummarizeResponse": {
+        "properties": {
+          "summary": {
+            "type": "string",
+            "title": "Summary"
+          }
+        },
+        "type": "object",
+        "required": [
+          "summary"
+        ],
+        "title": "SummarizeResponse"
+      },
       "ValidationError": {
         "properties": {
           "loc": {
diff --git a/private_gpt/launcher.py b/private_gpt/launcher.py
index 2245d56e..968baae4 100644
--- a/private_gpt/launcher.py
+++ b/private_gpt/launcher.py
@@ -15,6 +15,7 @@ from private_gpt.server.completions.completions_router import completions_router
 from private_gpt.server.embeddings.embeddings_router import embeddings_router
 from private_gpt.server.health.health_router import health_router
 from private_gpt.server.ingest.ingest_router import ingest_router
+from private_gpt.server.recipes.summarize.summarize_router import summarize_router
 from private_gpt.settings.settings import Settings
 
 logger = logging.getLogger(__name__)
@@ -32,12 +33,13 @@ def create_app(root_injector: Injector) -> FastAPI:
     app.include_router(chat_router)
     app.include_router(chunks_router)
     app.include_router(ingest_router)
+    app.include_router(summarize_router)
     app.include_router(embeddings_router)
     app.include_router(health_router)
 
     # Add LlamaIndex simple observability
     global_handler = create_global_handler("simple")
-    if global_handler is not None:
+    if global_handler:
         LlamaIndexSettings.callback_manager = CallbackManager([global_handler])
 
     settings = root_injector.get(Settings)
diff --git a/private_gpt/server/recipes/summarize/__init__.py b/private_gpt/server/recipes/summarize/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/private_gpt/server/recipes/summarize/summarize_router.py b/private_gpt/server/recipes/summarize/summarize_router.py
new file mode 100644
index 00000000..c1770c3c
--- /dev/null
+++ b/private_gpt/server/recipes/summarize/summarize_router.py
@@ -0,0 +1,86 @@
+from fastapi import APIRouter, Depends, Request
+from pydantic import BaseModel
+from starlette.responses import StreamingResponse
+
+from private_gpt.open_ai.extensions.context_filter import ContextFilter
+from private_gpt.open_ai.openai_models import (
+    to_openai_sse_stream,
+)
+from private_gpt.server.recipes.summarize.summarize_service import SummarizeService
+from private_gpt.server.utils.auth import authenticated
+
+summarize_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
+
+
+class SummarizeBody(BaseModel):
+    text: str | None = None
+    use_context: bool = False
+    context_filter: ContextFilter | None = None
+    prompt: str | None = None
+    instructions: str | None = None
+    stream: bool = False
+
+
+class SummarizeResponse(BaseModel):
+    summary: str
+
+
+@summarize_router.post(
+    "/summarize",
+    response_model=None,
+    summary="Summarize",
+    responses={200: {"model": SummarizeResponse}},
+    tags=["Recipes"],
+)
+def summarize(
+    request: Request, body: SummarizeBody
+) -> SummarizeResponse | StreamingResponse:
+    """Given a text, the model will return a summary.
+
+    Optionally include `instructions` to influence the way the summary is generated.
+
+    If `use_context`
+    is set to `true`, the model will also use the content coming from the ingested
+    documents in the summary. The documents being used can
+    be filtered by their metadata using the `context_filter`.
+    Ingested documents metadata can be found using `/ingest/list` endpoint.
+    If you want all ingested documents to be used, remove `context_filter` altogether.
+
+    If `prompt` is set, it will be used as the prompt for the summarization,
+    otherwise the default prompt will be used.
+
+    When using `'stream': true`, the API will return data chunks following [OpenAI's
+    streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):
+    ```
+    {"id":"12345","object":"completion.chunk","created":1694268190,
+    "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"},
+    "finish_reason":null}]}
+    ```
+    """
+    service: SummarizeService = request.state.injector.get(SummarizeService)
+
+    if body.stream:
+        completion_gen = service.stream_summarize(
+            text=body.text,
+            instructions=body.instructions,
+            use_context=body.use_context,
+            context_filter=body.context_filter,
+            prompt=body.prompt,
+        )
+        return StreamingResponse(
+            to_openai_sse_stream(
+                response_generator=completion_gen,
+            ),
+            media_type="text/event-stream",
+        )
+    else:
+        completion = service.summarize(
+            text=body.text,
+            instructions=body.instructions,
+            use_context=body.use_context,
+            context_filter=body.context_filter,
+            prompt=body.prompt,
+        )
+        return SummarizeResponse(
+            summary=completion,
+        )
diff --git a/private_gpt/server/recipes/summarize/summarize_service.py b/private_gpt/server/recipes/summarize/summarize_service.py
new file mode 100644
index 00000000..4bfd18f5
--- /dev/null
+++ b/private_gpt/server/recipes/summarize/summarize_service.py
@@ -0,0 +1,172 @@
+from itertools import chain
+
+from injector import inject, singleton
+from llama_index.core import (
+    Document,
+    StorageContext,
+    SummaryIndex,
+)
+from llama_index.core.base.response.schema import Response, StreamingResponse
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.response_synthesizers import ResponseMode
+from llama_index.core.storage.docstore.types import RefDocInfo
+from llama_index.core.types import TokenGen
+
+from private_gpt.components.embedding.embedding_component import EmbeddingComponent
+from private_gpt.components.llm.llm_component import LLMComponent
+from private_gpt.components.node_store.node_store_component import NodeStoreComponent
+from private_gpt.components.vector_store.vector_store_component import (
+    VectorStoreComponent,
+)
+from private_gpt.open_ai.extensions.context_filter import ContextFilter
+from private_gpt.settings.settings import Settings
+
+DEFAULT_SUMMARIZE_PROMPT = (
+    "Provide a comprehensive summary of the provided context information. "
+    "The summary should cover all the key points and main ideas presented in "
+    "the original text, while also condensing the information into a concise "
+    "and easy-to-understand format. Please ensure that the summary includes "
+    "relevant details and examples that support the main ideas, while avoiding "
+    "any unnecessary information or repetition."
+)
+
+
+@singleton
+class SummarizeService:
+    @inject
+    def __init__(
+        self,
+        settings: Settings,
+        llm_component: LLMComponent,
+        node_store_component: NodeStoreComponent,
+        vector_store_component: VectorStoreComponent,
+        embedding_component: EmbeddingComponent,
+    ) -> None:
+        self.settings = settings
+        self.llm_component = llm_component
+        self.node_store_component = node_store_component
+        self.vector_store_component = vector_store_component
+        self.embedding_component = embedding_component
+        self.storage_context = StorageContext.from_defaults(
+            vector_store=vector_store_component.vector_store,
+            docstore=node_store_component.doc_store,
+            index_store=node_store_component.index_store,
+        )
+
+    @staticmethod
+    def _filter_ref_docs(
+        ref_docs: dict[str, RefDocInfo], context_filter: ContextFilter | None
+    ) -> list[RefDocInfo]:
+        if context_filter is None or not context_filter.docs_ids:
+            return list(ref_docs.values())
+
+        return [
+            ref_doc
+            for doc_id, ref_doc in ref_docs.items()
+            if doc_id in context_filter.docs_ids
+        ]
+
+    def _summarize(
+        self,
+        use_context: bool = False,
+        stream: bool = False,
+        text: str | None = None,
+        instructions: str | None = None,
+        context_filter: ContextFilter | None = None,
+        prompt: str | None = None,
+    ) -> str | TokenGen:
+
+        nodes_to_summarize = []
+
+        # Add text to summarize
+        if text:
+            text_documents = [Document(text=text)]
+            nodes_to_summarize += (
+                SentenceSplitter.from_defaults().get_nodes_from_documents(
+                    text_documents
+                )
+            )
+
+        # Add context documents to summarize
+        if use_context:
+            # 1. Recover all ref docs
+            ref_docs: dict[
+                str, RefDocInfo
+            ] | None = self.storage_context.docstore.get_all_ref_doc_info()
+            if ref_docs is None:
+                raise ValueError("No documents have been ingested yet.")
+
+            # 2. Filter documents based on context_filter (if provided)
+            filtered_ref_docs = self._filter_ref_docs(ref_docs, context_filter)
+
+            # 3. Get all nodes from the filtered documents
+            filtered_node_ids = chain.from_iterable(
+                [ref_doc.node_ids for ref_doc in filtered_ref_docs]
+            )
+            filtered_nodes = self.storage_context.docstore.get_nodes(
+                node_ids=list(filtered_node_ids),
+            )
+
+            nodes_to_summarize += filtered_nodes
+
+        # Create a SummaryIndex to summarize the nodes
+        summary_index = SummaryIndex(
+            nodes=nodes_to_summarize,
+            storage_context=StorageContext.from_defaults(),  # In memory SummaryIndex
+            show_progress=True,
+        )
+
+        # Make a tree summarization query
+        # above the set of all candidate nodes
+        query_engine = summary_index.as_query_engine(
+            llm=self.llm_component.llm,
+            response_mode=ResponseMode.TREE_SUMMARIZE,
+            streaming=stream,
+            use_async=self.settings.summarize.use_async,
+        )
+
+        prompt = prompt or DEFAULT_SUMMARIZE_PROMPT
+
+        summarize_query = prompt + "\n" + (instructions or "")
+
+        response = query_engine.query(summarize_query)
+        if isinstance(response, Response):
+            return response.response or ""
+        elif isinstance(response, StreamingResponse):
+            return response.response_gen
+        else:
+            raise TypeError(f"The result is not of a supported type: {type(response)}")
+
+    def summarize(
+        self,
+        use_context: bool = False,
+        text: str | None = None,
+        instructions: str | None = None,
+        context_filter: ContextFilter | None = None,
+        prompt: str | None = None,
+    ) -> str:
+        return self._summarize(
+            use_context=use_context,
+            stream=False,
+            text=text,
+            instructions=instructions,
+            context_filter=context_filter,
+            prompt=prompt,
+        )  # type: ignore
+
+    def stream_summarize(
+        self,
+        use_context: bool = False,
+        text: str | None = None,
+        instructions: str | None = None,
+        context_filter: ContextFilter | None = None,
+        prompt: str | None = None,
+    ) -> TokenGen:
+        return self._summarize(
+            use_context=use_context,
+            stream=True,
+            text=text,
+            instructions=instructions,
+            context_filter=context_filter,
+            prompt=prompt,
+        )  # type: ignore
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 8ed7a5a8..c968f808 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -353,6 +353,10 @@ class UISettings(BaseModel):
     default_query_system_prompt: str = Field(
         None, description="The default system prompt to use for the query mode."
     )
+    default_summarization_system_prompt: str = Field(
+        None,
+        description="The default system prompt to use for the summarization mode.",
+    )
     delete_file_button_enabled: bool = Field(
         True, description="If the button to delete a file is enabled or not."
     )
@@ -388,6 +392,13 @@ class RagSettings(BaseModel):
     rerank: RerankSettings
 
 
+class SummarizeSettings(BaseModel):
+    use_async: bool = Field(
+        True,
+        description="If set to True, the summarization will be done asynchronously.",
+    )
+
+
 class ClickHouseSettings(BaseModel):
     host: str = Field(
         "localhost",
@@ -577,6 +588,7 @@ class Settings(BaseModel):
     vectorstore: VectorstoreSettings
     nodestore: NodeStoreSettings
     rag: RagSettings
+    summarize: SummarizeSettings
     qdrant: QdrantSettings | None = None
     postgres: PostgresSettings | None = None
     clickhouse: ClickHouseSettings | None = None
diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py
index d621bd28..0bf06d19 100644
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@@ -3,6 +3,7 @@ import base64
 import logging
 import time
 from collections.abc import Iterable
+from enum import Enum
 from pathlib import Path
 from typing import Any
 
@@ -11,6 +12,7 @@ from fastapi import FastAPI
 from gradio.themes.utils.colors import slate  # type: ignore
 from injector import inject, singleton
 from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
+from llama_index.core.types import TokenGen
 from pydantic import BaseModel
 
 from private_gpt.constants import PROJECT_ROOT_PATH
@@ -19,6 +21,7 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
 from private_gpt.server.chat.chat_service import ChatService, CompletionGen
 from private_gpt.server.chunks.chunks_service import Chunk, ChunksService
 from private_gpt.server.ingest.ingest_service import IngestService
+from private_gpt.server.recipes.summarize.summarize_service import SummarizeService
 from private_gpt.settings.settings import settings
 from private_gpt.ui.images import logo_svg
 
@@ -32,7 +35,20 @@ UI_TAB_TITLE = "My Private GPT"
 
 SOURCES_SEPARATOR = "<hr>Sources: \n"
 
-MODES = ["Query Files", "Search Files", "LLM Chat (no context from files)"]
+
+class Modes(str, Enum):
+    RAG_MODE = "RAG"
+    SEARCH_MODE = "Search"
+    BASIC_CHAT_MODE = "Basic"
+    SUMMARIZE_MODE = "Summarize"
+
+
+MODES: list[Modes] = [
+    Modes.RAG_MODE,
+    Modes.SEARCH_MODE,
+    Modes.BASIC_CHAT_MODE,
+    Modes.SUMMARIZE_MODE,
+]
 
 
 class Source(BaseModel):
@@ -70,10 +86,12 @@ class PrivateGptUi:
         ingest_service: IngestService,
         chat_service: ChatService,
         chunks_service: ChunksService,
+        summarizeService: SummarizeService,
     ) -> None:
         self._ingest_service = ingest_service
         self._chat_service = chat_service
         self._chunks_service = chunks_service
+        self._summarize_service = summarizeService
 
         # Cache the UI blocks
         self._ui_block = None
@@ -84,7 +102,9 @@ class PrivateGptUi:
         self.mode = MODES[0]
         self._system_prompt = self._get_default_system_prompt(self.mode)
 
-    def _chat(self, message: str, history: list[list[str]], mode: str, *_: Any) -> Any:
+    def _chat(
+        self, message: str, history: list[list[str]], mode: Modes, *_: Any
+    ) -> Any:
         def yield_deltas(completion_gen: CompletionGen) -> Iterable[str]:
             full_response: str = ""
             stream = completion_gen.response
@@ -112,6 +132,12 @@ class PrivateGptUi:
                 full_response += sources_text
             yield full_response
 
+        def yield_tokens(token_gen: TokenGen) -> Iterable[str]:
+            full_response: str = ""
+            for token in token_gen:
+                full_response += str(token)
+                yield full_response
+
         def build_history() -> list[ChatMessage]:
             history_messages: list[ChatMessage] = []
 
@@ -143,8 +169,7 @@ class PrivateGptUi:
                 ),
             )
         match mode:
-            case "Query Files":
-
+            case Modes.RAG_MODE:
                 # Use only the selected file for the query
                 context_filter = None
                 if self._selected_filename is not None:
@@ -163,14 +188,14 @@ class PrivateGptUi:
                     context_filter=context_filter,
                 )
                 yield from yield_deltas(query_stream)
-            case "LLM Chat (no context from files)":
+            case Modes.BASIC_CHAT_MODE:
                 llm_stream = self._chat_service.stream_chat(
                     messages=all_messages,
                     use_context=False,
                 )
                 yield from yield_deltas(llm_stream)
 
-            case "Search Files":
+            case Modes.SEARCH_MODE:
                 response = self._chunks_service.retrieve_relevant(
                     text=message, limit=4, prev_next_chunks=0
                 )
@@ -183,37 +208,76 @@ class PrivateGptUi:
                     f"{source.text}"
                     for index, source in enumerate(sources, start=1)
                 )
+            case Modes.SUMMARIZE_MODE:
+                # Summarize the given message, optionally using selected files
+                context_filter = None
+                if self._selected_filename:
+                    docs_ids = []
+                    for ingested_document in self._ingest_service.list_ingested():
+                        if (
+                            ingested_document.doc_metadata["file_name"]
+                            == self._selected_filename
+                        ):
+                            docs_ids.append(ingested_document.doc_id)
+                    context_filter = ContextFilter(docs_ids=docs_ids)
+
+                summary_stream = self._summarize_service.stream_summarize(
+                    use_context=True,
+                    context_filter=context_filter,
+                    instructions=message,
+                )
+                yield from yield_tokens(summary_stream)
 
     # On initialization and on mode change, this function set the system prompt
     # to the default prompt based on the mode (and user settings).
     @staticmethod
-    def _get_default_system_prompt(mode: str) -> str:
+    def _get_default_system_prompt(mode: Modes) -> str:
         p = ""
         match mode:
             # For query chat mode, obtain default system prompt from settings
-            case "Query Files":
+            case Modes.RAG_MODE:
                 p = settings().ui.default_query_system_prompt
             # For chat mode, obtain default system prompt from settings
-            case "LLM Chat (no context from files)":
+            case Modes.BASIC_CHAT_MODE:
                 p = settings().ui.default_chat_system_prompt
+            # For summarization mode, obtain default system prompt from settings
+            case Modes.SUMMARIZE_MODE:
+                p = settings().ui.default_summarization_system_prompt
             # For any other mode, clear the system prompt
             case _:
                 p = ""
         return p
 
+    @staticmethod
+    def _get_default_mode_explanation(mode: Modes) -> str:
+        match mode:
+            case Modes.RAG_MODE:
+                return "Get contextualized answers from selected files."
+            case Modes.SEARCH_MODE:
+                return "Find relevant chunks of text in selected files."
+            case Modes.BASIC_CHAT_MODE:
+                return "Chat with the LLM using its training data. Files are ignored."
+            case Modes.SUMMARIZE_MODE:
+                return "Generate a summary of the selected files. Prompt to customize the result."
+            case _:
+                return ""
+
     def _set_system_prompt(self, system_prompt_input: str) -> None:
         logger.info(f"Setting system prompt to: {system_prompt_input}")
         self._system_prompt = system_prompt_input
 
-    def _set_current_mode(self, mode: str) -> Any:
+    def _set_explanatation_mode(self, explanation_mode: str) -> None:
+        self._explanation_mode = explanation_mode
+
+    def _set_current_mode(self, mode: Modes) -> Any:
         self.mode = mode
         self._set_system_prompt(self._get_default_system_prompt(mode))
-        # Update placeholder and allow interaction if default system prompt is set
-        if self._system_prompt:
-            return gr.update(placeholder=self._system_prompt, interactive=True)
-        # Update placeholder and disable interaction if no default system prompt is set
-        else:
-            return gr.update(placeholder=self._system_prompt, interactive=False)
+        self._set_explanatation_mode(self._get_default_mode_explanation(mode))
+        interactive = self._system_prompt is not None
+        return [
+            gr.update(placeholder=self._system_prompt, interactive=interactive),
+            gr.update(value=self._explanation_mode),
+        ]
 
     def _list_ingested_files(self) -> list[list[str]]:
         files = set()
@@ -326,10 +390,17 @@ class PrivateGptUi:
 
             with gr.Row(equal_height=False):
                 with gr.Column(scale=3):
+                    default_mode = MODES[0]
                     mode = gr.Radio(
-                        MODES,
+                        [mode.value for mode in MODES],
                         label="Mode",
-                        value="Query Files",
+                        value=default_mode,
+                    )
+                    explanation_mode = gr.Textbox(
+                        placeholder=self._get_default_mode_explanation(default_mode),
+                        show_label=False,
+                        max_lines=3,
+                        interactive=False,
                     )
                     upload_button = gr.components.UploadButton(
                         "Upload File(s)",
@@ -413,9 +484,11 @@ class PrivateGptUi:
                         interactive=True,
                         render=False,
                     )
-                    # When mode changes, set default system prompt
+                    # When mode changes, set default system prompt, and other stuffs
                     mode.change(
-                        self._set_current_mode, inputs=mode, outputs=system_prompt_input
+                        self._set_current_mode,
+                        inputs=mode,
+                        outputs=[system_prompt_input, explanation_mode],
                     )
                     # On blur, set system prompt to use in queries
                     system_prompt_input.blur(
diff --git a/settings.yaml b/settings.yaml
index 2c68bd6b..a890733e 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -34,6 +34,13 @@ ui:
     You can only answer questions about the provided context. 
     If you know the answer but it is not based in the provided context, don't provide 
     the answer, just state the answer is not in the context provided.
+  default_summarization_system_prompt: >
+    Provide a comprehensive summary of the provided context information. 
+    The summary should cover all the key points and main ideas presented in
+    the original text, while also condensing the information into a concise 
+    and easy-to-understand format. Please ensure that the summary includes
+    relevant details and examples that support the main ideas, while avoiding 
+    any unnecessary information or repetition.
   delete_file_button_enabled: true
   delete_all_files_button_enabled: true
 
@@ -57,6 +64,9 @@ rag:
     model: cross-encoder/ms-marco-MiniLM-L-2-v2
     top_n: 1
 
+summarize:
+  use_async: true
+
 clickhouse:
     host: localhost
     port: 8443
diff --git a/tests/server/recipes/test_summarize_router.py b/tests/server/recipes/test_summarize_router.py
new file mode 100644
index 00000000..8b647b07
--- /dev/null
+++ b/tests/server/recipes/test_summarize_router.py
@@ -0,0 +1,159 @@
+from fastapi.testclient import TestClient
+
+from private_gpt.server.recipes.summarize.summarize_router import (
+    SummarizeBody,
+    SummarizeResponse,
+)
+
+
+def test_summarize_route_produces_a_stream(test_client: TestClient) -> None:
+    body = SummarizeBody(
+        text="Test",
+        stream=True,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    raw_events = response.text.split("\n\n")
+    events = [
+        item.removeprefix("data: ") for item in raw_events if item.startswith("data: ")
+    ]
+    assert response.status_code == 200
+    assert "text/event-stream" in response.headers["content-type"]
+    assert len(events) > 0
+    assert events[-1] == "[DONE]"
+
+
+def test_summarize_route_produces_a_single_value(test_client: TestClient) -> None:
+    body = SummarizeBody(
+        text="test",
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    # No asserts, if it validates it's good
+    SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+
+
+def test_summarize_with_document_context(test_client: TestClient) -> None:
+    # Ingest an document
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name",
+            "text": "Lorem ipsum dolor sit amet",
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={"docs_ids": [doc["doc_id"] for doc in ingested_docs]},
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find("Lorem ipsum dolor sit amet") != -1
+
+
+def test_summarize_with_non_existent_document_context_not_fails(
+    test_client: TestClient,
+) -> None:
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={
+            "docs_ids": ["non-existent-doc-id"],
+        },
+        stream=False,
+    )
+
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find("Empty Response") != -1
+
+
+def test_summarize_with_metadata_and_document_context(test_client: TestClient) -> None:
+    docs = []
+
+    # Ingest a first document
+    document_1_content = "Content of document 1"
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name_1",
+            "text": document_1_content,
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+    docs += ingested_docs
+
+    # Ingest a second document
+    document_2_content = "Text of document 2"
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name_2",
+            "text": document_2_content,
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+    docs += ingested_docs
+
+    # Completions with the first document's id and the second document's metadata
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={"docs_ids": [doc["doc_id"] for doc in docs]},
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # Assert both documents are part of the used sources
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find(document_1_content) != -1
+    assert completion.summary.find(document_2_content) != -1
+
+
+def test_summarize_with_prompt(test_client: TestClient) -> None:
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name",
+            "text": "Lorem ipsum dolor sit amet",
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={
+            "docs_ids": [doc["doc_id"] for doc in ingested_docs],
+        },
+        prompt="This is a custom summary prompt, 54321",
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find("This is a custom summary prompt, 54321") != -1