community[patch]: Fix Ollama for LLaMA3 (#20624)

We see verbose generations w/ LLaMA3 and Ollama - https://smith.langchain.com/public/88c4cd21-3d57-4229-96fe-53443398ca99/r --- Fix here implies that when stop was being set to an empty list, the stream had no conditions under which to stop, which could lead to excessive or unintended output. Test LLaMA2 - https://smith.langchain.com/public/57dfc64a-591b-46fa-a1cd-8783acaefea2/r Test LLaMA3 - https://smith.langchain.com/public/76ff5f47-ac89-4772-a7d2-5caa907d3fd6/r https://smith.langchain.com/public/a31d2fad-9094-4c93-949a-964b27630ccb/r Test Mistral - https://smith.langchain.com/public/a4fe7114-c308-4317-b9fd-6c86d31f1c5b/r --------- Co-authored-by: Erick Friis <erick@langchain.dev>
2025-08-10 21:35:08 +00:00 · 2024-04-18 17:20:32 -07:00 · 2024-04-18 17:20:32 -07:00 · d5c22b80a5
commit d5c22b80a5
parent 726234eee5
4 changed files with 36 additions and 94 deletions
--- a/docs/docs/integrations/chat/ollama.ipynb
+++ b/docs/docs/integrations/chat/ollama.ipynb
@ -30,7 +30,7 @@
    "* [Download](https://ollama.ai/download) and install Ollama onto the available supported platforms (including Windows Subsystem for Linux)\n",
    "* Fetch available LLM model via `ollama pull <name-of-model>`\n",
    "    * View a list of available models via the [model library](https://ollama.ai/library)\n",
-    "    * e.g., for `Llama-7b`: `ollama pull llama2`\n",
+    "    * e.g., `ollama pull llama3`\n",
    "* This will download the default tagged version of the model. Typically, the default points to the latest, smallest sized-parameter model.\n",
    "\n",
    "> On Mac, the models will be download to `~/.ollama/models`\n",
@ -46,7 +46,7 @@
    "\n",
    "You can see a full list of supported parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain.llms.ollama.Ollama.html).\n",
    "\n",
-    "If you are using a LLaMA `chat` model (e.g., `ollama pull llama2:7b-chat`) then you can use the `ChatOllama` interface.\n",
+    "If you are using a LLaMA `chat` model (e.g., `ollama pull llama3`) then you can use the `ChatOllama` interface.\n",
    "\n",
    "This includes [special tokens](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) for system message and user input.\n",
    "\n",
@ -65,7 +65,7 @@
    "\n",
    "```bash\n",
    "curl http://localhost:11434/api/generate -d '{\n",
-    "  \"model\": \"llama2\",\n",
+    "  \"model\": \"llama3\",\n",
    "  \"prompt\":\"Why is the sky blue?\"\n",
    "}'\n",
    "```\n",
@ -86,11 +86,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      " Sure, here's a fun space-themed joke for you:\n",
+      "Why did the astronaut break up with his girlfriend?\n",
      "\n",
-      "Why don't astronauts like broccoli? \n",
-      "Because it has too many \"crisps\" in it!\n",
-      "\n"
+      "Because he needed space!\n"
     ]
    }
   ],
@ -102,7 +100,7 @@
    "\n",
    "# supports many more optional parameters. Hover on your `ChatOllama(...)`\n",
    "# class to view the latest available supported parameters\n",
-    "llm = ChatOllama(model=\"llama2\")\n",
+    "llm = ChatOllama(model=\"llama3\")\n",
    "prompt = ChatPromptTemplate.from_template(\"Tell me a short joke about {topic}\")\n",
    "\n",
    "# using LangChain Expressive Language chain syntax\n",
@ -125,21 +123,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      " Sure\n",
-      ",\n",
-      " here\n",
-      "'s\n",
-      " a\n",
-      " joke\n",
-      ":\n",
-      " Why\n",
+      "Why\n",
      " did\n",
      " the\n",
      " astronaut\n",
@ -148,17 +139,18 @@
      " with\n",
      " his\n",
      " girlfriend\n",
+      " before\n",
+      " going\n",
+      " to\n",
+      " Mars\n",
      "?\n",
-      " Because\n",
+      "\n",
+      "\n",
+      "Because\n",
      " he\n",
      " needed\n",
-      " more\n",
      " space\n",
-      " to\n",
-      " explore\n",
-      ".\n",
-      "\n",
-      "\n",
+      "!\n",
      "\n"
     ]
    }
@ -179,51 +171,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " Sure\n",
-      ",\n",
-      " here\n",
-      "'s\n",
-      " a\n",
-      " little\n",
-      " one\n",
-      ":\n",
-      " Why\n",
-      " did\n",
-      " the\n",
-      " rocket\n",
-      " scientist\n",
-      " break\n",
-      " up\n",
-      " with\n",
-      " her\n",
-      " partner\n",
-      "?\n",
-      " Because\n",
-      " he\n",
-      " couldn\n",
-      "'t\n",
-      " handle\n",
-      " all\n",
-      " her\n",
-      " \"\n",
-      "space\n",
-      "y\n",
-      "\"\n",
-      " jokes\n",
-      ".\n",
-      "\n",
-      "\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "topic = {\"topic\": \"Space travel\"}\n",
    "\n",
@ -255,13 +205,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.chat_models import ChatOllama\n",
    "\n",
-    "llm = ChatOllama(model=\"llama2\", format=\"json\", temperature=0)"
+    "llm = ChatOllama(model=\"llama3\", format=\"json\", temperature=0)"
   ]
  },
  {
@ -273,7 +223,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='{\\n\"morning\": {\\n\"color\": \"light blue\"\\n},\\n\"noon\": {\\n\"color\": \"blue\"\\n},\\n\"afternoon\": {\\n\"color\": \"grayish-blue\"\\n},\\n\"evening\": {\\n\"color\": \"pinkish-orange\"\\n}\\n}'\n"
+      "content='{ \"morning\": \"blue\", \"noon\": \"clear blue\", \"afternoon\": \"hazy yellow\", \"evening\": \"orange-red\" }\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n  \\n\\n\\n\\n\\n\\n ' id='run-e893700f-e2d0-4df8-ad86-17525dcee318-0'\n"
     ]
    }
   ],
@ -292,7 +242,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -300,13 +250,9 @@
     "output_type": "stream",
     "text": [
      "\n",
-      "{\n",
-      "\"name\": \"John\",\n",
-      "\"age\": 35,\n",
-      "\"interests\": [\n",
-      "\"pizza\"\n",
-      "]\n",
-      "}\n"
+      "Name: John\n",
+      "Age: 35\n",
+      "Likes: Pizza\n"
     ]
    }
   ],
@ -516,7 +462,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.8"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/llms/ollama.ipynb
+++ b/docs/docs/integrations/llms/ollama.ipynb
@ -21,7 +21,7 @@
    "* [Download](https://ollama.ai/download) and install Ollama onto the available supported platforms (including Windows Subsystem for Linux)\n",
    "* Fetch available LLM model via `ollama pull <name-of-model>`\n",
    "    * View a list of available models via the [model library](https://ollama.ai/library)\n",
-    "    * e.g., for `Llama-7b`: `ollama pull llama2`\n",
+    "    * e.g., `ollama pull llama3`\n",
    "* This will download the default tagged version of the model. Typically, the default points to the latest, smallest sized-parameter model.\n",
    "\n",
    "> On Mac, the models will be download to `~/.ollama/models`\n",
@ -37,7 +37,7 @@
    "\n",
    "You can see a full list of supported parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain.llms.ollama.Ollama.html).\n",
    "\n",
-    "If you are using a LLaMA `chat` model (e.g., `ollama pull llama2:7b-chat`) then you can use the `ChatOllama` interface.\n",
+    "If you are using a LLaMA `chat` model (e.g., `ollama pull llama3`) then you can use the `ChatOllama` interface.\n",
    "\n",
    "This includes [special tokens](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) for system message and user input.\n",
    "\n",
@ -56,7 +56,7 @@
    "\n",
    "```bash\n",
    "curl http://localhost:11434/api/generate -d '{\n",
-    "  \"model\": \"llama2\",\n",
+    "  \"model\": \"llama3\",\n",
    "  \"prompt\":\"Why is the sky blue?\"\n",
    "}'\n",
    "```\n",
@ -70,16 +70,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"Sure! Here's a quick one:\\n\\nWhy don't scientists trust atoms?\\nBecause they make up everything!\\n\\nI hope that brought a smile to your face!\""
+       "\"Here's one:\\n\\nWhy don't scientists trust atoms?\\n\\nBecause they make up everything!\\n\\nHope that made you smile! Do you want to hear another one?\""
      ]
     },
-     "execution_count": 2,
+     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -87,7 +87,7 @@
   "source": [
    "from langchain_community.llms import Ollama\n",
    "\n",
-    "llm = Ollama(model=\"llama2\")\n",
+    "llm = Ollama(model=\"llama3\")\n",
    "\n",
    "llm.invoke(\"Tell me a joke\")"
   ]
@ -298,7 +298,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.8"
  }
 },
 "nbformat": 4,
--- a/libs/community/langchain_community/llms/ollama.py
+++ b/libs/community/langchain_community/llms/ollama.py
@ -203,8 +203,6 @@ class _OllamaCommon(BaseLanguageModel):
            raise ValueError("`stop` found in both the input and default params.")
        elif self.stop is not None:
            stop = self.stop
-        elif stop is None:
-            stop = []

        params = self._default_params

@ -267,8 +265,6 @@ class _OllamaCommon(BaseLanguageModel):
            raise ValueError("`stop` found in both the input and default params.")
        elif self.stop is not None:
            stop = self.stop
-        elif stop is None:
-            stop = []

        params = self._default_params

--- a/libs/community/tests/unit_tests/llms/test_ollama.py
+++ b/libs/community/tests/unit_tests/llms/test_ollama.py
@ -91,7 +91,7 @@ def test_handle_kwargs_top_level_parameters(monkeypatch: MonkeyPatch) -> None:
                "num_predict": None,
                "repeat_last_n": None,
                "repeat_penalty": None,
-                "stop": [],
+                "stop": None,
                "temperature": None,
                "tfs_z": None,
                "top_k": None,
@ -138,7 +138,7 @@ def test_handle_kwargs_with_unknown_param(monkeypatch: MonkeyPatch) -> None:
                "num_predict": None,
                "repeat_last_n": None,
                "repeat_penalty": None,
-                "stop": [],
+                "stop": None,
                "temperature": 0.8,
                "tfs_z": None,
                "top_k": None,