feat(llm): Add support for Ollama LLM (#1526)

2025-06-28 00:08:39 +00:00 · 2024-02-09 16:50:50 +02:00 · 2024-02-09 16:50:50 +02:00 · 6bbec79583
commit 6bbec79583
parent b178b51451
4 changed files with 53 additions and 1 deletions
--- a/fern/docs/pages/manual/llms.mdx
+++ b/fern/docs/pages/manual/llms.mdx
@ -102,3 +102,33 @@ or

 When the server is started it will print a log *Application startup complete*.
 Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+
+### Using Ollama
+
+Another option for a fully private setup is using [Ollama](https://ollama.ai/).
+
+Note: how to deploy Ollama and pull models onto it is out of the scope of this documentation.
+
+In order to do so, create a profile `settings-ollama.yaml` with the following contents:
+
+```yaml
+llm:
+  mode: ollama
+
+ollama:
+  model: <ollama_model_to_use> # Required Model to use.
+                               # Note: Ollama Models are listed here: https://ollama.ai/library
+                               #       Be sure to pull the model to your Ollama server
+  api_base: <ollama-api-base-url> # Defaults to http://localhost:11434
+```
+
+And run PrivateGPT loading that profile you just created:
+
+`PGPT_PROFILES=ollama make run`
+
+or
+
+`PGPT_PROFILES=ollama poetry run python -m private_gpt`
+
+When the server is started it will print a log *Application startup complete*.
+Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -80,3 +80,10 @@ class LLMComponent:
                )
            case "mock":
                self.llm = MockLLM()
+            case "ollama":
+                from llama_index.llms import Ollama
+
+                ollama_settings = settings.ollama
+                self.llm = Ollama(
+                    model=ollama_settings.model, base_url=ollama_settings.api_base
+                )
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -81,7 +81,7 @@ class DataSettings(BaseModel):


 class LLMSettings(BaseModel):
-    mode: Literal["local", "openai", "openailike", "sagemaker", "mock"]
+    mode: Literal["local", "openai", "openailike", "sagemaker", "mock", "ollama"]
    max_new_tokens: int = Field(
        256,
        description="The maximum number of token that the LLM is authorized to generate in one completion.",
@ -168,6 +168,17 @@ class OpenAISettings(BaseModel):
    )


+class OllamaSettings(BaseModel):
+    api_base: str = Field(
+        "http://localhost:11434",
+        description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
+    )
+    model: str = Field(
+        None,
+        description="Model to use. Example: 'llama2-uncensored'.",
+    )
+
+
 class UISettings(BaseModel):
    enabled: bool
    path: str
@ -243,6 +254,7 @@ class Settings(BaseModel):
    local: LocalSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings
+    ollama: OllamaSettings
    vectorstore: VectorstoreSettings
    qdrant: QdrantSettings | None = None

--- a/settings.yaml
+++ b/settings.yaml
@ -63,3 +63,6 @@ sagemaker:
 openai:
  api_key: ${OPENAI_API_KEY:}
  model: gpt-3.5-turbo
+
+ollama:
+  model: llama2-uncensored