From 845432fea0fc8a40b417beaa73024cbad8f9b1b2 Mon Sep 17 00:00:00 2001
From: Dmitry <tumaskow@yandex.ru>
Date: Tue, 15 Jul 2025 07:17:14 +0200
Subject: [PATCH] feat(model): AI/ML API integration (#2844)

---
 configs/dbgpt-proxy-aimlapi.toml              |  29 ++
 docker/base/Dockerfile                        |   5 +-
 ...pi_aimlapideploymodelparameters_a1b2c3.mdx |  34 +++
 docs/docs/config-reference/llm/index.mdx      |   5 +
 docs/docs/installation/docker.md              |   9 +-
 docs/docs/installation/docker_compose.md      |   7 +-
 .../integrations/aimlapi_llm_install.md       |  24 ++
 .../src/dbgpt/model/proxy/__init__.py         |   3 +
 .../src/dbgpt/model/proxy/llms/aimlapi.py     | 258 ++++++++++++++++++
 .../src/dbgpt_ext/rag/embeddings/__init__.py  |   2 +
 .../src/dbgpt_ext/rag/embeddings/aimlapi.py   | 173 ++++++++++++
 11 files changed, 545 insertions(+), 4 deletions(-)
 create mode 100644 configs/dbgpt-proxy-aimlapi.toml
 create mode 100644 docs/docs/config-reference/llm/aimlapi_aimlapideploymodelparameters_a1b2c3.mdx
 create mode 100644 docs/docs/installation/integrations/aimlapi_llm_install.md
 create mode 100644 packages/dbgpt-core/src/dbgpt/model/proxy/llms/aimlapi.py
 create mode 100644 packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/aimlapi.py

diff --git a/configs/dbgpt-proxy-aimlapi.toml b/configs/dbgpt-proxy-aimlapi.toml
new file mode 100644
index 000000000..bdcdefbf1
--- /dev/null
+++ b/configs/dbgpt-proxy-aimlapi.toml
@@ -0,0 +1,29 @@
+[system]
+language = "${env:DBGPT_LANG:-en}"
+api_keys = []
+encrypt_key = "your_secret_key"
+
+[service.web]
+host = "0.0.0.0"
+port = 5670
+
+[service.web.database]
+type = "sqlite"
+path = "pilot/meta_data/dbgpt.db"
+
+[rag.storage]
+[rag.storage.vector]
+type = "chroma"
+persist_path = "pilot/data"
+
+[models]
+[[models.llms]]
+name = "${env:LLM_MODEL_NAME:-gpt-4o}"
+provider = "proxy/aimlapi"
+api_key = "${env:AIMLAPI_API_KEY}"
+
+[[models.embeddings]]
+name = "${env:EMBEDDING_MODEL_NAME:-text-embedding-3-small}"
+provider = "proxy/aimlapi"
+api_url = "https://api.aimlapi.com/v1/embeddings"
+api_key = "${env:AIMLAPI_API_KEY}"
diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile
index 7465c1c58..c99a857f6 100644
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -123,4 +123,7 @@ RUN sed -i "s|/app/\.venv|${FINAL_VENV_NAME}|g" /${FINAL_VENV_NAME}/bin/activate
 ENV PATH="${FINAL_VENV_NAME}/bin:$PATH" \
     VIRTUAL_ENV="${FINAL_VENV_NAME}"
 # Default command
-CMD ["dbgpt", "start", "webserver", "--config", "configs/dbgpt-proxy-siliconflow.toml"]
\ No newline at end of file
+CMD ["dbgpt", "start", "webserver", "--config", "configs/dbgpt-proxy-siliconflow.toml"]
+
+# Uncomment the following line to use the AI/ML API configuration
+# CMD ["dbgpt", "start", "webserver", "--config", "configs/dbgpt-proxy-aimlapi.toml"]
diff --git a/docs/docs/config-reference/llm/aimlapi_aimlapideploymodelparameters_a1b2c3.mdx b/docs/docs/config-reference/llm/aimlapi_aimlapideploymodelparameters_a1b2c3.mdx
new file mode 100644
index 000000000..76dc3e022
--- /dev/null
+++ b/docs/docs/config-reference/llm/aimlapi_aimlapideploymodelparameters_a1b2c3.mdx
@@ -0,0 +1,34 @@
+---
+title: "AI/ML API Proxy LLM Configuration"
+description: "AI/ML API proxy LLM configuration."
+---
+
+import { ConfigDetail } from "@site/src/components/mdx/ConfigDetail";
+
+<ConfigDetail config={{
+  "name": "Llm.v1.gpt-3.5-turbo-1106",
+  "description": "OpenAI-compatible chat completion request schema.",
+  "documentationUrl": "https://api.aimlapi.com/docs-public",
+  "parameters": [
+    { "name": "model", "type": "string", "required": true, "description": "ID of the model to use." },
+    { "name": "messages", "type": "array", "required": true, "description": "List of messages comprising the conversation." },
+    { "name": "max_completion_tokens", "type": "integer", "required": false, "description": "Maximum number of tokens to generate for completion." },
+    { "name": "max_tokens", "type": "integer", "required": false, "description": "Alias for max_completion_tokens." },
+    { "name": "stream", "type": "boolean", "required": false, "description": "Whether to stream back partial progress." },
+    { "name": "stream_options", "type": "object", "required": false, "description": "Additional options to control streaming behavior." },
+    { "name": "tools", "type": "array", "required": false, "description": "List of tools (functions or APIs) the model may call." },
+    { "name": "tool_choice", "type": "object", "required": false, "description": "Which tool the model should call, if any." },
+    { "name": "parallel_tool_calls", "type": "boolean", "required": false, "description": "Whether tools can be called in parallel." },
+    { "name": "n", "type": "integer", "required": false, "description": "How many completions to generate for each prompt." },
+    { "name": "stop", "type": "array|string", "required": false, "description": "Sequences where the model will stop generating further tokens." },
+    { "name": "logprobs", "type": "boolean", "required": false, "description": "Whether to include log probabilities for tokens." },
+    { "name": "top_logprobs", "type": "integer", "required": false, "description": "Number of most likely tokens to return logprobs for." },
+    { "name": "logit_bias", "type": "object", "required": false, "description": "Modify likelihood of specified tokens appearing in the completion." },
+    { "name": "frequency_penalty", "type": "number", "required": false, "description": "How much to penalize new tokens based on frequency." },
+    { "name": "presence_penalty", "type": "number", "required": false, "description": "How much to penalize new tokens based on whether they appear in the text so far." },
+    { "name": "seed", "type": "integer", "required": false, "description": "Seed for sampling (for reproducibility)." },
+    { "name": "temperature", "type": "number", "required": false, "description": "Sampling temperature to use (higher = more random)." },
+    { "name": "top_p", "type": "number", "required": false, "description": "Nucleus sampling (top-p) cutoff value." },
+    { "name": "response_format", "type": "object|string", "required": false, "description": "Format to return the completion in, such as 'json' or 'text'." }
+  ]
+}} />
diff --git a/docs/docs/config-reference/llm/index.mdx b/docs/docs/config-reference/llm/index.mdx
index fd7b58820..213bb9df1 100644
--- a/docs/docs/config-reference/llm/index.mdx
+++ b/docs/docs/config-reference/llm/index.mdx
@@ -82,6 +82,11 @@ import { ConfigClassTable } from '@site/src/components/mdx/ConfigClassTable';
     "description": "OpenAI Compatible Proxy LLM",
     "link": "./chatgpt_openaicompatibledeploymodelparameters_c3d426"
   },
+  {
+    "name": "AimlapiDeployModelParameters",
+    "description": "AI/ML API proxy LLM configuration.",
+    "link": "./aimlapi_aimlapideploymodelparameters_a1b2c3"
+  },
   {
     "name": "SiliconFlowDeployModelParameters",
     "description": "SiliconFlow proxy LLM configuration.",
diff --git a/docs/docs/installation/docker.md b/docs/docs/installation/docker.md
index 221918a61..697a26edf 100644
--- a/docs/docs/installation/docker.md
+++ b/docs/docs/installation/docker.md
@@ -24,15 +24,20 @@ docker pull eosphorosai/dbgpt-openai:latest
 
 2. Run the Docker container
 
-This example requires you previde a valid API key for the SiliconFlow API. You can obtain one by signing up at [SiliconFlow](https://siliconflow.cn/) and creating an API key at [API Key](https://cloud.siliconflow.cn/account/ak).
+This example requires you provide a valid API key for the SiliconFlow API. You can obtain one by signing up at [SiliconFlow](https://siliconflow.cn/) and creating an API key at [API Key](https://cloud.siliconflow.cn/account/ak). Alternatively, set `AIMLAPI_API_KEY` to use the AI/ML API service.
 
 
 ```bash
 docker run -it --rm -e SILICONFLOW_API_KEY=${SILICONFLOW_API_KEY} \
  -p 5670:5670 --name dbgpt eosphorosai/dbgpt-openai
 ```
+Or with AI/ML API:
+```bash
+docker run -it --rm -e AIMLAPI_API_KEY=${AIMLAPI_API_KEY} \
+ -p 5670:5670 --name dbgpt eosphorosai/dbgpt-openai
+```
 
-Please replace `${SILICONFLOW_API_KEY}` with your own API key.
+Please replace `${SILICONFLOW_API_KEY}` or `${AIMLAPI_API_KEY}` with your own API key.
 
 
 Then you can visit [http://localhost:5670](http://localhost:5670) in the browser.
diff --git a/docs/docs/installation/docker_compose.md b/docs/docs/installation/docker_compose.md
index 25503e4a3..ae9825a9f 100644
--- a/docs/docs/installation/docker_compose.md
+++ b/docs/docs/installation/docker_compose.md
@@ -2,12 +2,17 @@
 
 ## Run via Docker-Compose
 
-This example requires you previde a valid API key for the SiliconFlow API. You can obtain one by signing up at [SiliconFlow](https://siliconflow.cn/) and creating an API key at [API Key](https://cloud.siliconflow.cn/account/ak).
+This example requires you provide a valid API key for the SiliconFlow API. You can obtain one by signing up at [SiliconFlow](https://siliconflow.cn/) and creating an API key at [API Key](https://cloud.siliconflow.cn/account/ak).
+Alternatively you can use the [AI/ML API](https://aimlapi.com/) by setting `AIMLAPI_API_KEY`.
 
 
 ```bash
 SILICONFLOW_API_KEY=${SILICONFLOW_API_KEY} docker compose up -d
 ```
+Or use AI/ML API:
+```bash
+AIMLAPI_API_KEY=${AIMLAPI_API_KEY} docker compose up -d
+```
 
 You will see the following output if the deployment is successful.
 ```bash
diff --git a/docs/docs/installation/integrations/aimlapi_llm_install.md b/docs/docs/installation/integrations/aimlapi_llm_install.md
new file mode 100644
index 000000000..5ff34817b
--- /dev/null
+++ b/docs/docs/installation/integrations/aimlapi_llm_install.md
@@ -0,0 +1,24 @@
+# AI/ML API
+
+### AI/ML API provides 300+ AI models including Deepseek, Gemini, ChatGPT. The models run at enterprise-grade rate limits and uptimes.
+
+### This section describes how to use the AI/ML API provider with DB-GPT.
+
+1. Sign up at [AI/ML API](https://aimlapi.com/app/?utm_source=db_gpt&utm_medium=github&utm_campaign=integration) and generate an API key.
+2. Set the environment variable `AIMLAPI_API_KEY` with your key.
+3. Use the `configs/dbgpt-proxy-aimlapi.toml` configuration when starting DB-GPT.
+
+### You can look up models at [https://aimlapi.com/models/](https://aimlapi.com/models/?utm_source=db_gpt&utm_medium=github&utm_campaign=integration)
+
+### Or you can use docker/base/Dockerfile to run DB-GPT with AI/ML API:
+
+```dockerfile
+# Expose the port for the web server, if you want to run it directly from the Dockerfile
+EXPOSE 5670
+
+# Set the environment variable for the AIMLAPI API key
+ENV AIMLAPI_API_KEY="***"
+
+# Just uncomment the following line in the `Dockerfile` to use AI/ML API:
+CMD ["dbgpt", "start", "webserver", "--config", "configs/dbgpt-proxy-aimlapi.toml"]
+```
diff --git a/packages/dbgpt-core/src/dbgpt/model/proxy/__init__.py b/packages/dbgpt-core/src/dbgpt/model/proxy/__init__.py
index 8db369c84..938d2f6d6 100644
--- a/packages/dbgpt-core/src/dbgpt/model/proxy/__init__.py
+++ b/packages/dbgpt-core/src/dbgpt/model/proxy/__init__.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
+    from dbgpt.model.proxy.llms.aimlapi import AimlapiLLMClient
     from dbgpt.model.proxy.llms.chatgpt import OpenAILLMClient
     from dbgpt.model.proxy.llms.claude import ClaudeLLMClient
     from dbgpt.model.proxy.llms.deepseek import DeepseekLLMClient
@@ -24,6 +25,7 @@ def __lazy_import(name):
         "OpenAILLMClient": "dbgpt.model.proxy.llms.chatgpt",
         "ClaudeLLMClient": "dbgpt.model.proxy.llms.claude",
         "GeminiLLMClient": "dbgpt.model.proxy.llms.gemini",
+        "AimlapiLLMClient": "dbgpt.model.proxy.llms.aimlapi",
         "SiliconFlowLLMClient": "dbgpt.model.proxy.llms.siliconflow",
         "SparkLLMClient": "dbgpt.model.proxy.llms.spark",
         "TongyiLLMClient": "dbgpt.model.proxy.llms.tongyi",
@@ -55,6 +57,7 @@ __all__ = [
     "TongyiLLMClient",
     "ZhipuLLMClient",
     "WenxinLLMClient",
+    "AimlapiLLMClient",
     "SiliconFlowLLMClient",
     "SparkLLMClient",
     "YiLLMClient",
diff --git a/packages/dbgpt-core/src/dbgpt/model/proxy/llms/aimlapi.py b/packages/dbgpt-core/src/dbgpt/model/proxy/llms/aimlapi.py
new file mode 100644
index 000000000..893b1e615
--- /dev/null
+++ b/packages/dbgpt-core/src/dbgpt/model/proxy/llms/aimlapi.py
@@ -0,0 +1,258 @@
+import os
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Dict, Optional, Type, Union
+
+from dbgpt.core import ModelMetadata
+from dbgpt.core.awel.flow import (
+    TAGS_ORDER_HIGH,
+    ResourceCategory,
+    auto_register_resource,
+)
+from dbgpt.model.proxy.llms.proxy_model import ProxyModel, parse_model_request
+from dbgpt.util.i18n_utils import _
+
+from ..base import (
+    AsyncGenerateStreamFunction,
+    GenerateStreamFunction,
+    register_proxy_model_adapter,
+)
+from .chatgpt import OpenAICompatibleDeployModelParameters, OpenAILLMClient
+
+AIMLAPI_HEADERS = {
+    "HTTP-Referer": "https://github.com/eosphoros-ai/DB-GPT",
+    "X-Title": "DB GPT",
+}
+
+if TYPE_CHECKING:
+    from httpx._types import ProxiesTypes
+    from openai import AsyncAzureOpenAI, AsyncOpenAI
+
+    ClientType = Union[AsyncAzureOpenAI, AsyncOpenAI]
+
+
+_AIMLAPI_DEFAULT_MODEL = "gpt-4o"
+
+
+@auto_register_resource(
+    label=_("AI/ML API Proxy LLM"),
+    category=ResourceCategory.LLM_CLIENT,
+    tags={"order": TAGS_ORDER_HIGH},
+    description=_("AI/ML API proxy LLM configuration."),
+    documentation_url="https://api.aimlapi.com/v1/",
+    show_in_ui=False,
+)
+@dataclass
+class AimlapiDeployModelParameters(OpenAICompatibleDeployModelParameters):
+    """Deploy model parameters for AI/ML API."""
+
+    provider: str = "proxy/aimlapi"
+
+    api_base: Optional[str] = field(
+        default="${env:AIMLAPI_API_BASE:-https://api.aimlapi.com/v1}",
+        metadata={"help": _("The base url of the AI/ML API.")},
+    )
+
+    api_key: Optional[str] = field(
+        default="${env:AIMLAPI_API_KEY}",
+        metadata={"help": _("The API key of the AI/ML API."), "tags": "privacy"},
+    )
+
+
+async def aimlapi_generate_stream(
+    model: ProxyModel, tokenizer, params, device, context_len=2048
+):
+    client: AimlapiLLMClient = model.proxy_llm_client
+    request = parse_model_request(params, client.default_model, stream=True)
+    async for r in client.generate_stream(request):
+        yield r
+
+
+class AimlapiLLMClient(OpenAILLMClient):
+    """AI/ML API LLM Client using OpenAI-compatible endpoints."""
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        api_type: Optional[str] = None,
+        api_version: Optional[str] = None,
+        model: Optional[str] = _AIMLAPI_DEFAULT_MODEL,
+        proxies: Optional["ProxiesTypes"] = None,
+        timeout: Optional[int] = 240,
+        model_alias: Optional[str] = _AIMLAPI_DEFAULT_MODEL,
+        context_length: Optional[int] = None,
+        openai_client: Optional["ClientType"] = None,
+        openai_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ):
+        api_base = (
+            api_base or os.getenv("AIMLAPI_API_BASE") or "https://api.aimlapi.com/v1"
+        )
+        api_key = api_key or os.getenv("AIMLAPI_API_KEY")
+        model = model or _AIMLAPI_DEFAULT_MODEL
+        if not context_length:
+            if "200k" in model:
+                context_length = 200 * 1024
+            else:
+                context_length = 4096
+
+        if not api_key:
+            raise ValueError(
+                "AI/ML API key is required, please set 'AIMLAPI_API_KEY' "
+                "in environment or pass it as an argument."
+            )
+
+        super().__init__(
+            api_key=api_key,
+            api_base=api_base,
+            api_type=api_type,
+            api_version=api_version,
+            model=model,
+            proxies=proxies,
+            timeout=timeout,
+            model_alias=model_alias,
+            context_length=context_length,
+            openai_client=openai_client,
+            openai_kwargs=openai_kwargs,
+            **kwargs,
+        )
+        try:
+            self.client.default_headers.update(AIMLAPI_HEADERS)
+        except Exception:
+            pass
+
+    @property
+    def default_model(self) -> str:
+        model = self._model
+        if not model:
+            model = _AIMLAPI_DEFAULT_MODEL
+        return model
+
+    @classmethod
+    def param_class(cls) -> Type[AimlapiDeployModelParameters]:
+        return AimlapiDeployModelParameters
+
+    @classmethod
+    def generate_stream_function(
+        cls,
+    ) -> Optional[Union[GenerateStreamFunction, AsyncGenerateStreamFunction]]:
+        return aimlapi_generate_stream
+
+
+register_proxy_model_adapter(
+    AimlapiLLMClient,
+    supported_models=[
+        ModelMetadata(
+            model=["openai/gpt-4"],
+            context_length=8_000,
+            max_output_length=4_096,
+            description="OpenAI GPT‑4: state‑of‑the‑art language model",
+            link="https://aimlapi.com/models/chat-gpt-4",
+            function_calling=True,
+        ),
+        ModelMetadata(
+            model=["openai/gpt-4o", "gpt-4o-mini", "openai/gpt-4-turbo"],
+            context_length=128_000,
+            max_output_length=16_384,
+            description="GPT‑4 family (4o, 4o‑mini, 4 Turbo) via AI/ML API",
+            link="https://aimlapi.com/models#openai-gpt-4o",
+            function_calling=True,
+        ),
+        ModelMetadata(
+            model=["gpt-3.5-turbo"],
+            context_length=16_000,
+            max_output_length=4_096,
+            description="GPT‑3.5 Turbo: fast, high‑quality text generation",
+            link="https://aimlapi.com/models/chat-gpt-3-5-turbo",
+            function_calling=True,
+        ),
+        ModelMetadata(
+            model=[
+                "mistralai/Mistral-7B-Instruct-v0.3",
+                "meta-llama/Llama-3.1-405B",
+                "Qwen/Qwen2-235B",
+            ],
+            context_length=32_000,
+            max_output_length=8_192,
+            description="Instruction‑tuned LLMs with 32k token context window",
+            link="https://aimlapi.com/models",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=[
+                "google/gemini-2-27b-it",
+                "x-ai/grok-2-beta",
+                "bytedance/seedream-3.0",
+            ],
+            context_length=8_000,
+            max_output_length=4_096,
+            description="Models with 8k token context window, no function_calling",
+            link="https://aimlapi.com/models",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=["claude-3-5-sonnet-20240620"],
+            context_length=8_192,
+            max_output_length=2_048,
+            description="Claude 3.5 Sonnet: advanced multimodal model from Anthropic",
+            link="https://aimlapi.com/models/claude-3-5-sonnet",
+            function_calling=True,
+        ),
+        ModelMetadata(
+            model=["deepseek-chat"],
+            context_length=128_000,
+            max_output_length=16_000,
+            description="DeepSeek V3: efficient high‑performance LLM",
+            link="https://aimlapi.com/models/deepseek-v3",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=["mistralai/Mixtral-8x7B-Instruct-v0.1"],
+            context_length=64_000,
+            max_output_length=8_000,
+            description="Mixtral‑8x7B: sparse mixture‑of‑experts instruction model",
+            link="https://aimlapi.com/models/mixtral-8x7b-instruct-v01",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=["meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo"],
+            context_length=131_000,
+            max_output_length=16_000,
+            description="Llama 3.2‑90B: advanced vision‑instruct turbo model",
+            link="https://aimlapi.com/models/llama-3-2-90b-vision-instruct-turbo-api",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=["google/gemini-2-0-flash"],
+            context_length=1_000_000,
+            max_output_length=32_768,
+            description="Gemini 2.0 Flash: ultra‑low latency multimodal model",
+            link="https://aimlapi.com/models/gemini-2-0-flash-api",
+            function_calling=True,
+        ),
+        ModelMetadata(
+            model=["meta-llama/Meta-Llama-3-8B-Instruct-Lite"],
+            context_length=9_000,
+            max_output_length=1_024,
+            description="Llama 3 8B Instruct Lite: compact dialogue model",
+            link="https://aimlapi.com/models/llama-3-8b-instruct-lite-api",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=["cohere/command-r-plus"],
+            context_length=128_000,
+            max_output_length=16_000,
+            description="Cohere Command R+: enterprise‑grade chat model",
+            link="https://aimlapi.com/models/command-r-api",
+            function_calling=False,
+        ),
+        ModelMetadata(
+            model=["mistralai/codestral-2501"],
+            context_length=256_000,
+            max_output_length=32_000,
+            description="Codestral‑2501: advanced code generation model",
+            link="https://aimlapi.com/models/mistral-codestral-2501-api",
+            function_calling=False,
+        ),
+    ],
+)
diff --git a/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/__init__.py b/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/__init__.py
index 1ddd5a90c..d5ce7388a 100644
--- a/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/__init__.py
+++ b/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/__init__.py
@@ -1,5 +1,6 @@
 """Module for embedding related classes and functions."""
 
+from .aimlapi import AimlapiEmbeddings  # noqa: F401
 from .jina import JinaEmbeddings  # noqa: F401
 from .ollama import OllamaEmbeddings  # noqa: F401
 from .qianfan import QianFanEmbeddings  # noqa: F401
@@ -12,4 +13,5 @@ __ALL__ = [
     "QianFanEmbeddings",
     "TongYiEmbeddings",
     "SiliconFlowEmbeddings",
+    "AimlapiEmbeddings",
 ]
diff --git a/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/aimlapi.py b/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/aimlapi.py
new file mode 100644
index 000000000..1981fc0ec
--- /dev/null
+++ b/packages/dbgpt-ext/src/dbgpt_ext/rag/embeddings/aimlapi.py
@@ -0,0 +1,173 @@
+"""AI/ML API embeddings for RAG."""
+
+from dataclasses import dataclass, field
+from typing import List, Optional, Type
+
+from dbgpt._private.pydantic import BaseModel, ConfigDict, Field
+from dbgpt.core import EmbeddingModelMetadata, Embeddings
+from dbgpt.core.interface.parameter import EmbeddingDeployModelParameters
+from dbgpt.model.adapter.base import register_embedding_adapter
+from dbgpt.util.i18n_utils import _
+
+AIMLAPI_HEADERS = {
+    "HTTP-Referer": "https://github.com/eosphoros-ai/DB-GPT",
+    "X-Title": "DB GPT",
+}
+
+
+@dataclass
+class AimlapiEmbeddingDeployModelParameters(EmbeddingDeployModelParameters):
+    """AI/ML API Embeddings deploy model parameters."""
+
+    provider: str = "proxy/aimlapi"
+
+    api_key: Optional[str] = field(
+        default="${env:AIMLAPI_API_KEY}",
+        metadata={"help": _("The API key for the embeddings API.")},
+    )
+    backend: Optional[str] = field(
+        default="text-embedding-3-small",
+        metadata={
+            "help": _(
+                "The real model name to pass to the provider, default is None. If "
+                "backend is None, use name as the real model name."
+            ),
+        },
+    )
+
+    @property
+    def real_provider_model_name(self) -> str:
+        return self.backend or self.name
+
+
+class AimlapiEmbeddings(BaseModel, Embeddings):
+    """The AI/ML API embeddings."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True, protected_namespaces=())
+    api_key: Optional[str] = Field(
+        default=None, description="The API key for the embeddings API."
+    )
+    model_name: str = Field(
+        default="text-embedding-3-small", description="The name of the model to use."
+    )
+
+    def __init__(self, **kwargs):
+        """Initialize the AI/ML API Embeddings."""
+        super().__init__(**kwargs)
+        self._api_key = self.api_key
+
+    @classmethod
+    def param_class(cls) -> Type[AimlapiEmbeddingDeployModelParameters]:
+        return AimlapiEmbeddingDeployModelParameters
+
+    @classmethod
+    def from_parameters(
+        cls, parameters: AimlapiEmbeddingDeployModelParameters
+    ) -> "Embeddings":
+        return cls(
+            api_key=parameters.api_key, model_name=parameters.real_provider_model_name
+        )
+
+    def embed_documents(
+        self, texts: List[str], max_batch_chunks_size: int = 25
+    ) -> List[List[float]]:
+        """Get the embeddings for a list of texts."""
+        import requests
+
+        embeddings = []
+        headers = {"Authorization": f"Bearer {self._api_key}"}
+        headers.update(AIMLAPI_HEADERS)
+
+        for i in range(0, len(texts), max_batch_chunks_size):
+            batch_texts = texts[i : i + max_batch_chunks_size]
+            response = requests.post(
+                url="https://api.aimlapi.com/v1/embeddings",
+                json={"model": self.model_name, "input": batch_texts},
+                headers=headers,
+            )
+            if response.status_code != 200:
+                raise RuntimeError(f"Embedding failed: {response.text}")
+            data = response.json()
+            batch_embeddings = data["data"]
+            sorted_embeddings = sorted(batch_embeddings, key=lambda e: e["index"])
+            embeddings.extend([result["embedding"] for result in sorted_embeddings])
+
+        return embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        return self.embed_documents([text])[0]
+
+
+register_embedding_adapter(
+    AimlapiEmbeddings,
+    supported_models=[
+        EmbeddingModelMetadata(
+            model=["text-embedding-3-large", "text-embedding-ada-002"],
+            dimension=1536,
+            context_length=8000,
+            description=_(
+                "High‑performance embedding models with "
+                "flexible dimensions and superior accuracy."
+            ),
+            link="https://aimlapi.com/models",
+        ),
+        EmbeddingModelMetadata(
+            model=["BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5"],
+            dimension=1536,
+            context_length=None,
+            description=_(
+                "BAAI BGE models for precise and high‑performance language embeddings."
+            ),
+            link="https://aimlapi.com/models",
+        ),
+        EmbeddingModelMetadata(
+            model=[
+                "togethercomputer/m2-bert-80M-32k-retrieval",
+                "voyage-finance-2",
+                "voyage-multilingual-2",
+            ],
+            dimension=1536,
+            context_length=32000,
+            description=_(
+                "High‑capacity embedding models with 32k token "
+                "context window for retrieval and specialized domains."
+            ),
+            link="https://aimlapi.com/models",
+        ),
+        EmbeddingModelMetadata(
+            model=[
+                "voyage-large-2-instruct",
+                "voyage-law-2",
+                "voyage-code-2",
+                "voyage-large-2",
+            ],
+            dimension=1536,
+            context_length=16000,
+            description=_(
+                "Voyage embedding models with 16k token context window, "
+                "optimized for general and instruction tasks."
+            ),
+            link="https://aimlapi.com/models",
+        ),
+        EmbeddingModelMetadata(
+            model=["voyage-2"],
+            dimension=1536,
+            context_length=4000,
+            description=_("Voyage 2: compact embeddings for smaller contexts."),
+            link="https://aimlapi.com/models",
+        ),
+        EmbeddingModelMetadata(
+            model=[
+                "textembedding-gecko@003",
+                "textembedding-gecko-multilingual@001",
+                "text-multilingual-embedding-002",
+            ],
+            dimension=1536,
+            context_length=2000,
+            description=_(
+                "Gecko and multilingual embedding models with 2k token context window."
+            ),
+            link="https://aimlapi.com/models",
+        ),
+    ],
+)