refactor(ollama): add ollama config and support ollama model output (#2411)

2025-08-02 00:28:00 +00:00 · 2025-03-07 18:23:52 +08:00 · 2025-03-07 18:23:52 +08:00 · bb06e93215
commit bb06e93215
parent 4e993a2be8
6 changed files with 49 additions and 4 deletions
--- a/configs/dbgpt-graphrag.toml
+++ b/configs/dbgpt-graphrag.toml
@ -7,7 +7,7 @@ encrypt_key = "your_secret_key"
 # Server Configurations
 [service.web]
-host = "127.0.0.1"
+host = "0.0.0.0"
 port = 5670
 [service.web.database]
--- a/configs/dbgpt-proxy-ollama.toml
+++ b/configs/dbgpt-proxy-ollama.toml
@ -0,0 +1,33 @@
 [system]
 # Load language from environment variable(It is set by the hook)
 language = "${env:DBGPT_LANG:-en}"
 api_keys = []
 encrypt_key = "your_secret_key"
 # Server Configurations
 [service.web]
 host = "0.0.0.0"
 port = 5670
 [service.web.database]
 type = "sqlite"
 path = "pilot/meta_data/dbgpt.db"
 [rag.storage]
 [rag.storage.vector]
 type = "Chroma"
 persist_path = "pilot/data"
 # Model Configurations
 [models]
 [[models.llms]]
 name = "deepseek-r1:1.5b"
 provider = "proxy/ollama"
 api_base = "http://localhost:11434"
 api_key = ""
 [[models.embeddings]]
 name = "bge-m3:latest"
 provider = "proxy/ollama"
 api_url = "http://localhost:11434"
 api_key = ""
--- a/configs/dbgpt-proxy-openai.toml
+++ b/configs/dbgpt-proxy-openai.toml
@ -6,7 +6,7 @@ encrypt_key = "your_secret_key"
 # Server Configurations
 [service.web]
-host = "127.0.0.1"
+host = "0.0.0.0"
 port = 5670
 [service.web.database]
--- a/packages/dbgpt-core/src/dbgpt/core/interface/llm.py
+++ b/packages/dbgpt-core/src/dbgpt/core/interface/llm.py
@ -297,6 +297,9 @@ class ModelRequestContext:
    request_id: Optional[str] = None
    """The request id of the model inference."""
    is_reasoning_model: Optional[bool] = False
    """Whether the model is a reasoning model."""
@dataclass
@PublicAPI(stability="beta")
--- a/packages/dbgpt-core/src/dbgpt/model/proxy/llms/ollama.py
+++ b/packages/dbgpt-core/src/dbgpt/model/proxy/llms/ollama.py
@ -19,6 +19,10 @@ from dbgpt.model.proxy.base import (
 from dbgpt.model.proxy.llms.proxy_model import ProxyModel, parse_model_request
 from dbgpt.util.i18n_utils import _
 from ...utils.parse_utils import (
    parse_chat_message,
 )
 logger = logging.getLogger(__name__)
@ -120,6 +124,7 @@ class OllamaLLMClient(ProxyLLMClient):
        messages = request.to_common_messages()
        model = request.model or self._model
        is_reasoning_model = getattr(request.context, "is_reasoning_model", False)
        client = Client(self._api_base)
        try:
            stream = client.chat(
@ -130,9 +135,12 @@ class OllamaLLMClient(ProxyLLMClient):
            content = ""
            for chunk in stream:
                content = content + chunk["message"]["content"]
-                yield ModelOutput(text=content, error_code=0)
+                msg = parse_chat_message(content, extract_reasoning=is_reasoning_model)
                yield ModelOutput.build(
                    text=msg.content, thinking=msg.reasoning_content, error_code=0
                )
        except ollama.ResponseError as e:
-            yield ModelOutput(
+            yield ModelOutput.build(
                text=f"**Ollama Response Error, Please CheckErrorInfo.**: {e}",
                error_code=-1,
            )
--- a/packages/dbgpt-core/src/dbgpt/model/utils/llm_utils.py
+++ b/packages/dbgpt-core/src/dbgpt/model/utils/llm_utils.py
@ -91,6 +91,7 @@ def parse_model_request(
        stream=stream,
        user_name=params.get("user_name"),
        request_id=params.get("request_id"),
        is_reasoning_model=params.get("is_reasoning_model", False),
    )
    request = ModelRequest.build_request(
        default_model,