feat(model): support ollama as an optional llm & embedding proxy (#1475)

Signed-off-by: shanhaikang.shk <shanhaikang.shk@oceanbase.com> Co-authored-by: Fangyin Cheng <staneyffer@gmail.com>
2025-09-13 05:01:25 +00:00 · 2024-04-28 18:36:45 +08:00
parent 0f8188b152
commit 744b3e4933
10 changed files with 231 additions and 1 deletions
--- a/.mypy.ini
+++ b/.mypy.ini
@@ -100,3 +100,6 @@ ignore_missing_imports = True
 [mypy-rich.*]
 ignore_missing_imports = True
 [mypy-ollama.*]
 ignore_missing_imports = True
--- a/dbgpt/configs/model_config.py
+++ b/dbgpt/configs/model_config.py
@@ -69,6 +69,7 @@ LLM_MODEL_CONFIG = {
    "yi_proxyllm": "yi_proxyllm",
    # https://platform.moonshot.cn/docs/
    "moonshot_proxyllm": "moonshot_proxyllm",
    "ollama_proxyllm": "ollama_proxyllm",
    "llama-2-7b": os.path.join(MODEL_PATH, "Llama-2-7b-chat-hf"),
    "llama-2-13b": os.path.join(MODEL_PATH, "Llama-2-13b-chat-hf"),
    "llama-2-70b": os.path.join(MODEL_PATH, "Llama-2-70b-chat-hf"),
@@ -200,6 +201,7 @@ EMBEDDING_MODEL_CONFIG = {
    "proxy_azure": "proxy_azure",
    # Common HTTP embedding model
    "proxy_http_openapi": "proxy_http_openapi",
    "proxy_ollama": "proxy_ollama",
 }
--- a/dbgpt/model/adapter/embeddings_loader.py
+++ b/dbgpt/model/adapter/embeddings_loader.py
@@ -50,6 +50,16 @@ class EmbeddingLoader:
                if proxy_param.proxy_backend:
                    openapi_param["model_name"] = proxy_param.proxy_backend
                return OpenAPIEmbeddings(**openapi_param)
            elif model_name in ["proxy_ollama"]:
                from dbgpt.rag.embedding import OllamaEmbeddings
                proxy_param = cast(ProxyEmbeddingParameters, param)
                ollama_param = {}
                if proxy_param.proxy_server_url:
                    ollama_param["api_url"] = proxy_param.proxy_server_url
                if proxy_param.proxy_backend:
                    ollama_param["model_name"] = proxy_param.proxy_backend
                return OllamaEmbeddings(**ollama_param)
            else:
                from dbgpt.rag.embedding import HuggingFaceEmbeddings
--- a/dbgpt/model/adapter/proxy_adapter.py
+++ b/dbgpt/model/adapter/proxy_adapter.py
@@ -114,6 +114,23 @@ class TongyiProxyLLMModelAdapter(ProxyLLMModelAdapter):
        return tongyi_generate_stream
 class OllamaLLMModelAdapter(ProxyLLMModelAdapter):
    def do_match(self, lower_model_name_or_path: Optional[str] = None):
        return lower_model_name_or_path == "ollama_proxyllm"
    def get_llm_client_class(
        self, params: ProxyModelParameters
    ) -> Type[ProxyLLMClient]:
        from dbgpt.model.proxy.llms.ollama import OllamaLLMClient
        return OllamaLLMClient
    def get_generate_stream_function(self, model, model_path: str):
        from dbgpt.model.proxy.llms.ollama import ollama_generate_stream
        return ollama_generate_stream
 class ZhipuProxyLLMModelAdapter(ProxyLLMModelAdapter):
    support_system_message = False
@@ -279,6 +296,7 @@ class MoonshotProxyLLMModelAdapter(ProxyLLMModelAdapter):
 register_model_adapter(OpenAIProxyLLMModelAdapter)
 register_model_adapter(TongyiProxyLLMModelAdapter)
 register_model_adapter(OllamaLLMModelAdapter)
 register_model_adapter(ZhipuProxyLLMModelAdapter)
 register_model_adapter(WenxinProxyLLMModelAdapter)
 register_model_adapter(GeminiProxyLLMModelAdapter)
--- a/dbgpt/model/parameter.py
+++ b/dbgpt/model/parameter.py
@@ -556,7 +556,7 @@ class ProxyEmbeddingParameters(BaseEmbeddingModelParameters):
 _EMBEDDING_PARAMETER_CLASS_TO_NAME_CONFIG = {
-    ProxyEmbeddingParameters: "proxy_openai,proxy_azure,proxy_http_openapi",
+    ProxyEmbeddingParameters: "proxy_openai,proxy_azure,proxy_http_openapi,proxy_ollama",
 }
 EMBEDDING_NAME_TO_PARAMETER_CLASS_CONFIG = {}
--- a/dbgpt/model/proxy/init.py
+++ b/dbgpt/model/proxy/init.py
@@ -11,6 +11,7 @@ def __lazy_import(name):
        "ZhipuLLMClient": "dbgpt.model.proxy.llms.zhipu",
        "YiLLMClient": "dbgpt.model.proxy.llms.yi",
        "MoonshotLLMClient": "dbgpt.model.proxy.llms.moonshot",
        "OllamaLLMClient": "dbgpt.model.proxy.llms.ollama",
    }
    if name in module_path:
@@ -33,4 +34,5 @@ __all__ = [
    "SparkLLMClient",
    "YiLLMClient",
    "MoonshotLLMClient",
    "OllamaLLMClient",
 ]
--- a/dbgpt/model/proxy/llms/ollama.py
+++ b/dbgpt/model/proxy/llms/ollama.py
@@ -0,0 +1,101 @@
 import logging
 from concurrent.futures import Executor
 from typing import Iterator, Optional
 from dbgpt.core import MessageConverter, ModelOutput, ModelRequest, ModelRequestContext
 from dbgpt.model.parameter import ProxyModelParameters
 from dbgpt.model.proxy.base import ProxyLLMClient
 from dbgpt.model.proxy.llms.proxy_model import ProxyModel
 logger = logging.getLogger(__name__)
 def ollama_generate_stream(
    model: ProxyModel, tokenizer, params, device, context_len=4096
 ):
    client: OllamaLLMClient = model.proxy_llm_client
    context = ModelRequestContext(stream=True, user_name=params.get("user_name"))
    request = ModelRequest.build_request(
        client.default_model,
        messages=params["messages"],
        temperature=params.get("temperature"),
        context=context,
        max_new_tokens=params.get("max_new_tokens"),
    )
    for r in client.sync_generate_stream(request):
        yield r
 class OllamaLLMClient(ProxyLLMClient):
    def __init__(
        self,
        model: Optional[str] = None,
        host: Optional[str] = None,
        model_alias: Optional[str] = "ollama_proxyllm",
        context_length: Optional[int] = 4096,
        executor: Optional[Executor] = None,
    ):
        if not model:
            model = "llama2"
        if not host:
            host = "http://localhost:11434"
        self._model = model
        self._host = host
        super().__init__(
            model_names=[model, model_alias],
            context_length=context_length,
            executor=executor,
        )
    @classmethod
    def new_client(
        cls,
        model_params: ProxyModelParameters,
        default_executor: Optional[Executor] = None,
    ) -> "OllamaLLMClient":
        return cls(
            model=model_params.proxyllm_backend,
            host=model_params.proxy_server_url,
            model_alias=model_params.model_name,
            context_length=model_params.max_context_size,
            executor=default_executor,
        )
    @property
    def default_model(self) -> str:
        return self._model
    def sync_generate_stream(
        self,
        request: ModelRequest,
        message_converter: Optional[MessageConverter] = None,
    ) -> Iterator[ModelOutput]:
        try:
            import ollama
            from ollama import Client
        except ImportError as e:
            raise ValueError(
                "Could not import python package: ollama "
                "Please install ollama by command `pip install ollama"
            ) from e
        request = self.local_covert_message(request, message_converter)
        messages = request.to_common_messages()
        model = request.model or self._model
        client = Client(self._host)
        try:
            stream = client.chat(
                model=model,
                messages=messages,
                stream=True,
            )
            content = ""
            for chunk in stream:
                content = content + chunk["message"]["content"]
                yield ModelOutput(text=content, error_code=0)
        except ollama.ResponseError as e:
            return ModelOutput(
                text=f"**Ollama Response Error, Please CheckErrorInfo.**: {e}",
                error_code=-1,
            )
--- a/dbgpt/rag/embedding/init.py
+++ b/dbgpt/rag/embedding/init.py
@@ -12,6 +12,7 @@ from .embeddings import (  # noqa: F401
    HuggingFaceInferenceAPIEmbeddings,
    HuggingFaceInstructEmbeddings,
    JinaEmbeddings,
    OllamaEmbeddings,
    OpenAPIEmbeddings,
 )
@@ -23,6 +24,7 @@ __ALL__ = [
    "HuggingFaceInstructEmbeddings",
    "JinaEmbeddings",
    "OpenAPIEmbeddings",
    "OllamaEmbeddings",
    "DefaultEmbeddingFactory",
    "EmbeddingFactory",
    "WrappedEmbeddingFactory",
--- a/dbgpt/rag/embedding/embeddings.py
+++ b/dbgpt/rag/embedding/embeddings.py
@@ -736,3 +736,94 @@ class OpenAPIEmbeddings(BaseModel, Embeddings):
        """Asynchronous Embed query text."""
        embeddings = await self.aembed_documents([text])
        return embeddings[0]
 class OllamaEmbeddings(BaseModel, Embeddings):
    """Ollama proxy embeddings.
    This class is used to get embeddings for a list of texts using the Ollama API.
    It requires a proxy server url `api_url` and a model name `model_name`.
    The default model name is "llama2".
    """
    api_url: str = Field(
        default="http://localhost:11434",
        description="The URL of the embeddings API.",
    )
    model_name: str = Field(
        default="llama2", description="The name of the model to use."
    )
    def __init__(self, **kwargs):
        """Initialize the OllamaEmbeddings."""
        super().__init__(**kwargs)
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Get the embeddings for a list of texts.
        Args:
            texts (Documents): A list of texts to get embeddings for.
        Returns:
            Embedded texts as List[List[float]], where each inner List[float]
                corresponds to a single input text.
        """
        return [self.embed_query(text) for text in texts]
    def embed_query(self, text: str) -> List[float]:
        """Compute query embeddings using a OpenAPI embedding model.
        Args:
            text: The text to embed.
        Returns:
            Embeddings for the text.
        """
        try:
            import ollama
            from ollama import Client
        except ImportError as e:
            raise ValueError(
                "Could not import python package: ollama "
                "Please install ollama by command `pip install ollama"
            ) from e
        try:
            return (
                Client(self.api_url).embeddings(model=self.model_name, prompt=text)
            )["embedding"]
        except ollama.ResponseError as e:
            raise ValueError(f"**Ollama Response Error, Please CheckErrorInfo.**: {e}")
    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
        """Asynchronous Embed search docs.
        Args:
            texts: A list of texts to get embeddings for.
        Returns:
            List[List[float]]: Embedded texts as List[List[float]], where each inner
                List[float] corresponds to a single input text.
        """
        embeddings = []
        for text in texts:
            embedding = await self.aembed_query(text)
            embeddings.append(embedding)
        return embeddings
    async def aembed_query(self, text: str) -> List[float]:
        """Asynchronous Embed query text."""
        try:
            import ollama
            from ollama import AsyncClient
        except ImportError:
            raise ValueError(
                "The ollama python package is not installed. "
                "Please install it with `pip install ollama`"
            )
        try:
            embedding = await AsyncClient(host=self.api_url).embeddings(
                model=self.model_name, prompt=text
            )
            return embedding["embedding"]
        except ollama.ResponseError as e:
            raise ValueError(f"**Ollama Response Error, Please CheckErrorInfo.**: {e}")
--- a/setup.py
+++ b/setup.py
@@ -658,6 +658,7 @@ def default_requires():
        "dashscope",
        "chardet",
        "sentencepiece",
        "ollama",
    ]
    setup_spec.extras["default"] += setup_spec.extras["framework"]
    setup_spec.extras["default"] += setup_spec.extras["rag"]