diff --git a/docs/docs/how_to/local_llms.ipynb b/docs/docs/how_to/local_llms.ipynb index 1d3f0b821f9..3a82e82b849 100644 --- a/docs/docs/how_to/local_llms.ipynb +++ b/docs/docs/how_to/local_llms.ipynb @@ -46,7 +46,7 @@ "\n", "1. [`llama.cpp`](https://github.com/ggerganov/llama.cpp): C++ implementation of llama inference code with [weight optimization / quantization](https://finbarr.ca/how-is-llama-cpp-possible/)\n", "2. [`gpt4all`](https://docs.gpt4all.io/index.html): Optimized C backend for inference\n", - "3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM\n", + "3. [`ollama`](https://github.com/ollama/ollama): Bundles model weights and environment into an app that runs on device and serves the LLM\n", "4. [`llamafile`](https://github.com/Mozilla-Ocho/llamafile): Bundles model weights and everything needed to run the model in a single file, allowing you to run the LLM locally from this file without any additional installation steps\n", "\n", "In general, these frameworks will do a few things:\n", @@ -74,12 +74,12 @@ "\n", "## Quickstart\n", "\n", - "[`Ollama`](https://ollama.ai/) is one way to easily run inference on macOS.\n", + "[Ollama](https://ollama.com/) is one way to easily run inference on macOS.\n", " \n", - "The instructions [here](https://github.com/jmorganca/ollama?tab=readme-ov-file#ollama) provide details, which we summarize:\n", + "The instructions [here](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) provide details, which we summarize:\n", " \n", "* [Download and run](https://ollama.ai/download) the app\n", - "* From command line, fetch a model from this [list of options](https://github.com/jmorganca/ollama): e.g., `ollama pull llama3.1:8b`\n", + "* From command line, fetch a model from this [list of options](https://ollama.com/search): e.g., `ollama pull gpt-oss:20b`\n", "* When the app is running, all models are automatically served on `localhost:11434`\n" ] }, @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "86178adb", "metadata": {}, "outputs": [ @@ -111,11 +111,11 @@ } ], "source": [ - "from langchain_ollama import OllamaLLM\n", + "from langchain_ollama import ChatOllama\n", "\n", - "llm = OllamaLLM(model=\"llama3.1:8b\")\n", + "llm = ChatOllama(model=\"gpt-oss:20b\", validate_model_on_init=True)\n", "\n", - "llm.invoke(\"The first man on the moon was ...\")" + "llm.invoke(\"The first man on the moon was ...\").content" ] }, { @@ -200,7 +200,7 @@ "\n", "### Running Apple silicon GPU\n", "\n", - "`Ollama` and [`llamafile`](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#gpu-support) will automatically utilize the GPU on Apple devices.\n", + "`ollama` and [`llamafile`](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#gpu-support) will automatically utilize the GPU on Apple devices.\n", " \n", "Other frameworks require the user to set up the environment to utilize the Apple GPU.\n", "\n", @@ -212,15 +212,15 @@ "\n", "In particular, ensure that conda is using the correct virtual environment that you created (`miniforge3`).\n", "\n", - "E.g., for me:\n", + "e.g., for me:\n", "\n", - "```\n", + "```shell\n", "conda activate /Users/rlm/miniforge3/envs/llama\n", "```\n", "\n", "With the above confirmed, then:\n", "\n", - "```\n", + "```shell\n", "CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir\n", "```" ] @@ -236,20 +236,16 @@ "\n", "1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp). You can also download models in [`llamafile` format](https://huggingface.co/models?other=llamafile) from HuggingFace.\n", "2. [`gpt4all`](https://gpt4all.io/index.html) - The model explorer offers a leaderboard of metrics and associated quantized models available for download \n", - "3. [`Ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n", + "3. [`ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n", "\n", "### Ollama\n", "\n", - "With [Ollama](https://github.com/jmorganca/ollama), fetch a model via `ollama pull :`:\n", - "\n", - "* E.g., for Llama 2 7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n", - "* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama?tab=readme-ov-file#model-library), e.g., `ollama pull llama2:13b`\n", - "* See the full set of parameters on the [API reference page](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.ollama.Ollama.html)" + "With [Ollama](https://github.com/ollama/ollama), fetch a model via `ollama pull :`." ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "8ecd2f78", "metadata": {}, "outputs": [ @@ -265,7 +261,7 @@ } ], "source": [ - "llm = OllamaLLM(model=\"llama2:13b\")\n", + "llm = ChatOllama(model=\"gpt-oss:20b\")\n", "llm.invoke(\"The first man on the moon was ... think step by step\")" ] }, @@ -694,7 +690,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "langchain", "language": "python", "name": "python3" }, @@ -708,7 +704,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/docs/docs/integrations/chat/ollama.ipynb b/docs/docs/integrations/chat/ollama.ipynb index be8b6284c78..c9441b46c51 100644 --- a/docs/docs/integrations/chat/ollama.ipynb +++ b/docs/docs/integrations/chat/ollama.ipynb @@ -17,9 +17,9 @@ "source": [ "# ChatOllama\n", "\n", - "[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as Llama 2, locally.\n", + "[Ollama](https://ollama.com/) allows you to run open-source large language models, such as `got-oss`, locally.\n", "\n", - "Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile.\n", + "`ollama` bundles model weights, configuration, and data into a single package, defined by a Modelfile.\n", "\n", "It optimizes setup and configuration details, including GPU usage.\n", "\n", @@ -28,14 +28,14 @@ "## Overview\n", "### Integration details\n", "\n", - "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/ollama) | Package downloads | Package latest |\n", + "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/ollama) | Package downloads | Package latest |\n", "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", - "| [ChatOllama](https://python.langchain.com/v0.2/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html) | [langchain-ollama](https://python.langchain.com/v0.2/api_reference/ollama/index.html) | ✅ | ❌ | ✅ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-ollama?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-ollama?style=flat-square&label=%20) |\n", + "| [ChatOllama](https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#chatollama) | [langchain-ollama](https://python.langchain.com/api_reference/ollama/index.html) | ✅ | ❌ | ✅ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-ollama?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-ollama?style=flat-square&label=%20) |\n", "\n", "### Model features\n", "| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n", "| :---: |:----------------------------------------------------:| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n", - "| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ |\n", + "| ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ |\n", "\n", "## Setup\n", "\n", @@ -45,17 +45,17 @@ " * macOS users can install via Homebrew with `brew install ollama` and start with `brew services start ollama`\n", "* Fetch available LLM model via `ollama pull `\n", " * View a list of available models via the [model library](https://ollama.ai/library)\n", - " * e.g., `ollama pull llama3`\n", + " * e.g., `ollama pull gpt-oss:20b`\n", "* This will download the default tagged version of the model. Typically, the default points to the latest, smallest sized-parameter model.\n", "\n", "> On Mac, the models will be download to `~/.ollama/models`\n", ">\n", "> On Linux (or WSL), the models will be stored at `/usr/share/ollama/.ollama/models`\n", "\n", - "* Specify the exact version of the model of interest as such `ollama pull vicuna:13b-v1.5-16k-q4_0` (View the [various tags for the `Vicuna`](https://ollama.ai/library/vicuna/tags) model in this instance)\n", + "* Specify the exact version of the model of interest as such `ollama pull gpt-oss:20b` (View the [various tags for the `Vicuna`](https://ollama.ai/library/vicuna/tags) model in this instance)\n", "* To view all pulled models, use `ollama list`\n", "* To chat directly with a model from the command line, use `ollama run `\n", - "* View the [Ollama documentation](https://github.com/ollama/ollama/tree/main/docs) for more commands. You can run `ollama help` in the terminal to see available commands.\n" + "* View the [Ollama documentation](https://github.com/ollama/ollama/blob/main/docs/README.md) for more commands. You can run `ollama help` in the terminal to see available commands.\n" ] }, { @@ -102,7 +102,11 @@ "id": "b18bd692076f7cf7", "metadata": {}, "source": [ - "Make sure you're using the latest Ollama version for structured outputs. Update by running:" + ":::warning\n", + "Make sure you're using the latest Ollama version!\n", + ":::\n", + "\n", + "Update by running:" ] }, { @@ -257,10 +261,10 @@ "source": [ "## Tool calling\n", "\n", - "We can use [tool calling](/docs/concepts/tool_calling/) with an LLM [that has been fine-tuned for tool use](https://ollama.com/search?&c=tools) such as `llama3.1`:\n", + "We can use [tool calling](/docs/concepts/tool_calling/) with an LLM [that has been fine-tuned for tool use](https://ollama.com/search?&c=tools) such as `gpt-oss`:\n", "\n", "```\n", - "ollama pull llama3.1\n", + "ollama pull gpt-oss:20b\n", "```\n", "\n", "Details on creating custom tools are available in [this guide](/docs/how_to/custom_tools/). Below, we demonstrate how to create a tool using the `@tool` decorator on a normal python function." @@ -268,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "f767015f", "metadata": {}, "outputs": [ @@ -300,7 +304,8 @@ "\n", "\n", "llm = ChatOllama(\n", - " model=\"llama3.1\",\n", + " model=\"gpt-oss:20b\",\n", + " validate_model_on_init=True,\n", " temperature=0,\n", ").bind_tools([validate_user])\n", "\n", @@ -321,9 +326,7 @@ "source": [ "## Multi-modal\n", "\n", - "Ollama has support for multi-modal LLMs, such as [bakllava](https://ollama.com/library/bakllava) and [llava](https://ollama.com/library/llava).\n", - "\n", - " ollama pull bakllava\n", + "Ollama has limited support for multi-modal LLMs, such as [gemma3](https://ollama.com/library/gemma3)\n", "\n", "Be sure to update Ollama so that you have the most recent version to support multi-modal." ] @@ -518,7 +521,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "langchain", "language": "python", "name": "python3" }, @@ -532,7 +535,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/ollama.mdx b/docs/docs/integrations/providers/ollama.mdx index 3d602eb6909..dce6a08c273 100644 --- a/docs/docs/integrations/providers/ollama.mdx +++ b/docs/docs/integrations/providers/ollama.mdx @@ -1,14 +1,14 @@ # Ollama >[Ollama](https://ollama.com/) allows you to run open-source large language models, -> such as [Llama3.1](https://ai.meta.com/blog/meta-llama-3-1/), locally. +> such as [gpt-oss](https://ollama.com/library/gpt-oss), locally. > >`Ollama` bundles model weights, configuration, and data into a single package, defined by a Modelfile. >It optimizes setup and configuration details, including GPU usage. >For a complete list of supported models and model variants, see the [Ollama model library](https://ollama.ai/library). -See [this guide](/docs/how_to/local_llms) for more details -on how to use `Ollama` with LangChain. +See [this guide](/docs/how_to/local_llms#ollama) for more details +on how to use `ollama` with LangChain. ## Installation and Setup ### Ollama installation @@ -26,7 +26,7 @@ ollama serve After starting ollama, run `ollama pull ` to download a model from the [Ollama model library](https://ollama.ai/library): ```bash -ollama pull llama3.1 +ollama pull gpt-oss:20b ``` - This will download the default tagged version of the model. Typically, the default points to the latest, smallest sized-parameter model. diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py index aa350643101..80c2ef3467a 100644 --- a/libs/partners/ollama/langchain_ollama/chat_models.py +++ b/libs/partners/ollama/langchain_ollama/chat_models.py @@ -229,7 +229,7 @@ class ChatOllama(BaseChatModel): .. code-block:: bash - ollama pull mistral:v0.3 + ollama pull gpt-oss:20b pip install -U langchain-ollama Key init args — completion params: @@ -262,7 +262,8 @@ class ChatOllama(BaseChatModel): from langchain_ollama import ChatOllama llm = ChatOllama( - model = "llama3", + model = "gpt-oss:20b", + validate_model_on_init = True, temperature = 0.8, num_predict = 256, # other params ... @@ -284,10 +285,7 @@ class ChatOllama(BaseChatModel): Stream: .. code-block:: python - messages = [ - ("human", "Return the words Hello World!"), - ] - for chunk in llm.stream(messages): + for chunk in llm.stream("Return the words Hello World!"): print(chunk.text(), end="") @@ -314,10 +312,7 @@ class ChatOllama(BaseChatModel): Async: .. code-block:: python - messages = [ - ("human", "Hello how are you!"), - ] - await llm.ainvoke(messages) + await llm.ainvoke("Hello how are you!") .. code-block:: python @@ -325,10 +320,7 @@ class ChatOllama(BaseChatModel): .. code-block:: python - messages = [ - ("human", "Say hello world!"), - ] - async for chunk in llm.astream(messages): + async for chunk in llm.astream("Say hello world!"): print(chunk.content) .. code-block:: python @@ -356,10 +348,7 @@ class ChatOllama(BaseChatModel): json_llm = ChatOllama(format="json") - messages = [ - ("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."), - ] - llm.invoke(messages).content + llm.invoke("Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only.").content .. code-block:: python @@ -406,17 +395,16 @@ class ChatOllama(BaseChatModel): llm = ChatOllama( model = "deepseek-r1:8b", + validate_model_on_init = True, reasoning= True, ) - user_message = HumanMessage(content="how many r in the word strawberry?") - messages: List[Any] = [user_message] - llm.invoke(messages) + llm.invoke("how many r in the word strawberry?") # or, on an invocation basis: - llm.invoke(messages, reasoning=True) - # or llm.stream(messages, reasoning=True) + llm.invoke("how many r in the word strawberry?", reasoning=True) + # or llm.stream("how many r in the word strawberry?", reasoning=True) # If not provided, the invocation will default to the ChatOllama reasoning # param provided (None by default).