diff --git a/docs/docs/integrations/llms/octoai.ipynb b/docs/docs/integrations/llms/octoai.ipynb
index aceeee284c5..589880f293f 100644
--- a/docs/docs/integrations/llms/octoai.ipynb
+++ b/docs/docs/integrations/llms/octoai.ipynb
@@ -26,19 +26,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
     "\n",
     "os.environ[\"OCTOAI_API_TOKEN\"] = \"OCTOAI_API_TOKEN\"\n",
-    "os.environ[\"ENDPOINT_URL\"] = \"https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate\""
+    "os.environ[\"ENDPOINT_URL\"] = \"https://text.octoai.run/v1/chat/completions\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,36 +66,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "llm = OctoAIEndpoint(\n",
     "    model_kwargs={\n",
-    "        \"max_new_tokens\": 200,\n",
-    "        \"temperature\": 0.75,\n",
-    "        \"top_p\": 0.95,\n",
-    "        \"repetition_penalty\": 1,\n",
-    "        \"seed\": None,\n",
-    "        \"stop\": [],\n",
+    "        \"model\": \"llama-2-13b-chat-fp16\",\n",
+    "        \"max_tokens\": 128,\n",
+    "        \"presence_penalty\": 0,\n",
+    "        \"temperature\": 0.1,\n",
+    "        \"top_p\": 0.9,\n",
+    "        \"messages\": [\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": \"You are a helpful assistant. Keep your responses limited to one short paragraph if possible.\",\n",
+    "            },\n",
+    "        ],\n",
     "    },\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "'\\nLeonardo da Vinci was an Italian polymath and painter regarded by many as one of the greatest painters of all time. He is best known for his masterpieces including Mona Lisa, The Last Supper, and The Virgin of the Rocks. He was a draftsman, sculptor, architect, and one of the most important figures in the history of science. Da Vinci flew gliders, experimented with water turbines and windmills, and invented the catapult and a joystick-type human-powered aircraft control. He may have pioneered helicopters. As a scholar, he was interested in anatomy, geology, botany, engineering, mathematics, and astronomy.\\nOther painters and patrons claimed to be more talented, but Leonardo da Vinci was an incredibly productive artist, sculptor, engineer, anatomist, and scientist.'"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Sure thing! Here's my response:\n",
+      "\n",
+      "Leonardo da Vinci was a true Renaissance man - an Italian polymath who excelled in various fields, including painting, sculpture, engineering, mathematics, anatomy, and geology. He is widely considered one of the greatest painters of all time, and his inventive and innovative works continue to inspire and influence artists and thinkers to this day. Some of his most famous works include the Mona Lisa, The Last Supper, and Vitruvian Man. \n"
+     ]
     }
    ],
    "source": [
@@ -103,7 +107,7 @@
     "\n",
     "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
     "\n",
-    "llm_chain.run(question)"
+    "print(llm_chain.run(question))"
    ]
   }
  ],
@@ -123,7 +127,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.7"
   },
   "vscode": {
    "interpreter": {
diff --git a/libs/community/langchain_community/embeddings/octoai_embeddings.py b/libs/community/langchain_community/embeddings/octoai_embeddings.py
index bcdd412e051..a93fa4e03ad 100644
--- a/libs/community/langchain_community/embeddings/octoai_embeddings.py
+++ b/libs/community/langchain_community/embeddings/octoai_embeddings.py
@@ -41,7 +41,7 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
             values, "octoai_api_token", "OCTOAI_API_TOKEN"
         )
         values["endpoint_url"] = get_from_dict_or_env(
-            values, "endpoint_url", "ENDPOINT_URL"
+            values, "endpoint_url", "https://text.octoai.run/v1/embeddings"
         )
         return values
 
@@ -59,19 +59,29 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
         """Compute embeddings using an OctoAI instruct model."""
         from octoai import client
 
+        embedding = []
         embeddings = []
         octoai_client = client.Client(token=self.octoai_api_token)
 
         for text in texts:
             parameter_payload = {
-                "sentence": str([text]),  # for item in text]),
-                "instruction": str([instruction]),  # for item in text]),
+                "sentence": str([text]),
+                "input": str([text]),
+                "instruction": str([instruction]),
+                "model": "thenlper/gte-large",
                 "parameters": self.model_kwargs or {},
             }
 
             try:
                 resp_json = octoai_client.infer(self.endpoint_url, parameter_payload)
-                embedding = resp_json["embeddings"]
+                if "embeddings" in resp_json:
+                    embedding = resp_json["embeddings"]
+                elif "data" in resp_json:
+                    json_data = resp_json["data"]
+                    for item in json_data:
+                        if "embedding" in item:
+                            embedding.append(item["embedding"])
+
             except Exception as e:
                 raise ValueError(f"Error raised by the inference endpoint: {e}") from e
 
diff --git a/libs/community/langchain_community/llms/octoai_endpoint.py b/libs/community/langchain_community/llms/octoai_endpoint.py
index a6002b8ae06..e72ac113e9c 100644
--- a/libs/community/langchain_community/llms/octoai_endpoint.py
+++ b/libs/community/langchain_community/llms/octoai_endpoint.py
@@ -24,23 +24,9 @@ class OctoAIEndpoint(LLM):
             from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
             OctoAIEndpoint(
                 octoai_api_token="octoai-api-key",
-                endpoint_url="https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate",
+                endpoint_url="https://text.octoai.run/v1/chat/completions",
                 model_kwargs={
-                    "max_new_tokens": 200,
-                    "temperature": 0.75,
-                    "top_p": 0.95,
-                    "repetition_penalty": 1,
-                    "seed": None,
-                    "stop": [],
-                },
-            )
-
-            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
-            OctoAIEndpoint(
-                octoai_api_token="octoai-api-key",
-                endpoint_url="https://llama-2-7b-chat-demo-kk0powt97tmb.octoai.run/v1/chat/completions",
-                model_kwargs={
-                    "model": "llama-2-7b-chat",
+                    "model": "llama-2-13b-chat-fp16",
                     "messages": [
                         {
                             "role": "system",
@@ -49,7 +35,10 @@ class OctoAIEndpoint(LLM):
                         }
                     ],
                     "stream": False,
-                    "max_tokens": 256
+                    "max_tokens": 256,
+                    "presence_penalty": 0,
+                    "temperature": 0.1,
+                    "top_p": 0.9
                 }
             )
 
@@ -119,19 +108,45 @@ class OctoAIEndpoint(LLM):
         _model_kwargs = self.model_kwargs or {}
 
         try:
-            # Initialize the OctoAI client
             from octoai import client
 
+            # Initialize the OctoAI client
             octoai_client = client.Client(token=self.octoai_api_token)
 
             if "model" in _model_kwargs:
                 parameter_payload = _model_kwargs
+
+                sys_msg = None
+                if "messages" in parameter_payload:
+                    msgs = parameter_payload.get("messages", [])
+                    for msg in msgs:
+                        if msg.get("role") == "system":
+                            sys_msg = msg.get("content")
+
+                # Reset messages list
+                parameter_payload["messages"] = []
+
+                # Append system message if exists
+                if sys_msg:
+                    parameter_payload["messages"].append(
+                        {"role": "system", "content": sys_msg}
+                    )
+
+                # Append user message
                 parameter_payload["messages"].append(
                     {"role": "user", "content": prompt}
                 )
+
                 # Send the request using the OctoAI client
-                output = octoai_client.infer(self.endpoint_url, parameter_payload)
-                text = output.get("choices")[0].get("message").get("content")
+                try:
+                    output = octoai_client.infer(self.endpoint_url, parameter_payload)
+                    if output and "choices" in output and len(output["choices"]) > 0:
+                        text = output["choices"][0].get("message", {}).get("content")
+                    else:
+                        text = "Error: Invalid response format or empty choices."
+                except Exception as e:
+                    text = f"Error during API call: {str(e)}"
+
             else:
                 # Prepare the payload JSON
                 parameter_payload = {"inputs": prompt, "parameters": _model_kwargs}