refactor(ollama): add ollama config and support ollama model output (#2411)

2025-09-14 05:31:40 +00:00 · 2025-03-07 18:23:52 +08:00
parent 4e993a2be8
commit bb06e93215
6 changed files with 49 additions and 4 deletions
--- a/configs/dbgpt-graphrag.toml
+++ b/configs/dbgpt-graphrag.toml
@@ -7,7 +7,7 @@ encrypt_key = "your_secret_key"

 # Server Configurations
 [service.web]
-host = "127.0.0.1"
+host = "0.0.0.0"
 port = 5670

 [service.web.database]
--- a/configs/dbgpt-proxy-ollama.toml
+++ b/configs/dbgpt-proxy-ollama.toml
@@ -0,0 +1,33 @@
+[system]
+# Load language from environment variable(It is set by the hook)
+language = "${env:DBGPT_LANG:-en}"
+api_keys = []
+encrypt_key = "your_secret_key"
+
+# Server Configurations
+[service.web]
+host = "0.0.0.0"
+port = 5670
+
+[service.web.database]
+type = "sqlite"
+path = "pilot/meta_data/dbgpt.db"
+
+[rag.storage]
+[rag.storage.vector]
+type = "Chroma"
+persist_path = "pilot/data"
+
+# Model Configurations
+[models]
+[[models.llms]]
+name = "deepseek-r1:1.5b"
+provider = "proxy/ollama"
+api_base = "http://localhost:11434"
+api_key = ""
+
+[[models.embeddings]]
+name = "bge-m3:latest"
+provider = "proxy/ollama"
+api_url = "http://localhost:11434"
+api_key = ""
--- a/configs/dbgpt-proxy-openai.toml
+++ b/configs/dbgpt-proxy-openai.toml
@@ -6,7 +6,7 @@ encrypt_key = "your_secret_key"

 # Server Configurations
 [service.web]
-host = "127.0.0.1"
+host = "0.0.0.0"
 port = 5670

 [service.web.database]
--- a/packages/dbgpt-core/src/dbgpt/core/interface/llm.py
+++ b/packages/dbgpt-core/src/dbgpt/core/interface/llm.py
@@ -297,6 +297,9 @@ class ModelRequestContext:
    request_id: Optional[str] = None
    """The request id of the model inference."""

+    is_reasoning_model: Optional[bool] = False
+    """Whether the model is a reasoning model."""
+

@dataclass
@PublicAPI(stability="beta")
--- a/packages/dbgpt-core/src/dbgpt/model/proxy/llms/ollama.py
+++ b/packages/dbgpt-core/src/dbgpt/model/proxy/llms/ollama.py
@@ -19,6 +19,10 @@ from dbgpt.model.proxy.base import (
 from dbgpt.model.proxy.llms.proxy_model import ProxyModel, parse_model_request
 from dbgpt.util.i18n_utils import _

+from ...utils.parse_utils import (
+    parse_chat_message,
+)
+
 logger = logging.getLogger(__name__)


@@ -120,6 +124,7 @@ class OllamaLLMClient(ProxyLLMClient):
        messages = request.to_common_messages()

        model = request.model or self._model
+        is_reasoning_model = getattr(request.context, "is_reasoning_model", False)
        client = Client(self._api_base)
        try:
            stream = client.chat(
@@ -130,9 +135,12 @@ class OllamaLLMClient(ProxyLLMClient):
            content = ""
            for chunk in stream:
                content = content + chunk["message"]["content"]
-                yield ModelOutput(text=content, error_code=0)
+                msg = parse_chat_message(content, extract_reasoning=is_reasoning_model)
+                yield ModelOutput.build(
+                    text=msg.content, thinking=msg.reasoning_content, error_code=0
+                )
        except ollama.ResponseError as e:
-            yield ModelOutput(
+            yield ModelOutput.build(
                text=f"**Ollama Response Error, Please CheckErrorInfo.**: {e}",
                error_code=-1,
            )
--- a/packages/dbgpt-core/src/dbgpt/model/utils/llm_utils.py
+++ b/packages/dbgpt-core/src/dbgpt/model/utils/llm_utils.py
@@ -91,6 +91,7 @@ def parse_model_request(
        stream=stream,
        user_name=params.get("user_name"),
        request_id=params.get("request_id"),
+        is_reasoning_model=params.get("is_reasoning_model", False),
    )
    request = ModelRequest.build_request(
        default_model,