mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-19 09:30:15 +00:00
Merge branch 'openaiembeddings-update' of https://github.com/ArmaanjeetSandhu/langchain into openaiembeddings-update
This commit is contained in:
commit
19d75f4ed6
276
docs/docs/integrations/chat/runpod.ipynb
Normal file
276
docs/docs/integrations/chat/runpod.ipynb
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# RunPod Chat Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Get started with RunPod chat models.\n",
|
||||||
|
"\n",
|
||||||
|
"## Overview\n",
|
||||||
|
"\n",
|
||||||
|
"This guide covers how to use the LangChain `ChatRunPod` class to interact with chat models hosted on [RunPod Serverless](https://www.runpod.io/serverless-gpu)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"\n",
|
||||||
|
"1. **Install the package:**\n",
|
||||||
|
" ```bash\n",
|
||||||
|
" pip install -qU langchain-runpod\n",
|
||||||
|
" ```\n",
|
||||||
|
"2. **Deploy a Chat Model Endpoint:** Follow the setup steps in the [RunPod Provider Guide](/docs/integrations/providers/runpod#setup) to deploy a compatible chat model endpoint on RunPod Serverless and get its Endpoint ID.\n",
|
||||||
|
"3. **Set Environment Variables:** Make sure `RUNPOD_API_KEY` and `RUNPOD_ENDPOINT_ID` (or a specific `RUNPOD_CHAT_ENDPOINT_ID`) are set."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import getpass\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"# Make sure environment variables are set (or pass them directly to ChatRunPod)\n",
|
||||||
|
"if \"RUNPOD_API_KEY\" not in os.environ:\n",
|
||||||
|
" os.environ[\"RUNPOD_API_KEY\"] = getpass.getpass(\"Enter your RunPod API Key: \")\n",
|
||||||
|
"\n",
|
||||||
|
"if \"RUNPOD_ENDPOINT_ID\" not in os.environ:\n",
|
||||||
|
" os.environ[\"RUNPOD_ENDPOINT_ID\"] = input(\n",
|
||||||
|
" \"Enter your RunPod Endpoint ID (used if RUNPOD_CHAT_ENDPOINT_ID is not set): \"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"# Optionally use a different endpoint ID specifically for chat models\n",
|
||||||
|
"# if \"RUNPOD_CHAT_ENDPOINT_ID\" not in os.environ:\n",
|
||||||
|
"# os.environ[\"RUNPOD_CHAT_ENDPOINT_ID\"] = input(\"Enter your RunPod Chat Endpoint ID (Optional): \")\n",
|
||||||
|
"\n",
|
||||||
|
"chat_endpoint_id = os.environ.get(\n",
|
||||||
|
" \"RUNPOD_CHAT_ENDPOINT_ID\", os.environ.get(\"RUNPOD_ENDPOINT_ID\")\n",
|
||||||
|
")\n",
|
||||||
|
"if not chat_endpoint_id:\n",
|
||||||
|
" raise ValueError(\n",
|
||||||
|
" \"No RunPod Endpoint ID found. Please set RUNPOD_ENDPOINT_ID or RUNPOD_CHAT_ENDPOINT_ID.\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Instantiation\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize the `ChatRunPod` class. You can pass model-specific parameters via `model_kwargs` and configure polling behavior."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_runpod import ChatRunPod\n",
|
||||||
|
"\n",
|
||||||
|
"chat = ChatRunPod(\n",
|
||||||
|
" runpod_endpoint_id=chat_endpoint_id, # Specify the correct endpoint ID\n",
|
||||||
|
" model_kwargs={\n",
|
||||||
|
" \"max_new_tokens\": 512,\n",
|
||||||
|
" \"temperature\": 0.7,\n",
|
||||||
|
" \"top_p\": 0.9,\n",
|
||||||
|
" # Add other parameters supported by your endpoint handler\n",
|
||||||
|
" },\n",
|
||||||
|
" # Optional: Adjust polling\n",
|
||||||
|
" # poll_interval=0.2,\n",
|
||||||
|
" # max_polling_attempts=150\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Invocation\n",
|
||||||
|
"\n",
|
||||||
|
"Use the standard LangChain `.invoke()` and `.ainvoke()` methods to call the model. Streaming is also supported via `.stream()` and `.astream()` (simulated by polling the RunPod `/stream` endpoint)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||||
|
"\n",
|
||||||
|
"messages = [\n",
|
||||||
|
" SystemMessage(content=\"You are a helpful AI assistant.\"),\n",
|
||||||
|
" HumanMessage(content=\"What is the RunPod Serverless API flow?\"),\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"# Invoke (Sync)\n",
|
||||||
|
"try:\n",
|
||||||
|
" response = chat.invoke(messages)\n",
|
||||||
|
" print(\"--- Sync Invoke Response ---\")\n",
|
||||||
|
" print(response.content)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"Error invoking Chat Model: {e}. Ensure endpoint ID/API key are correct and endpoint is active/compatible.\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"# Stream (Sync, simulated via polling /stream)\n",
|
||||||
|
"print(\"\\n--- Sync Stream Response ---\")\n",
|
||||||
|
"try:\n",
|
||||||
|
" for chunk in chat.stream(messages):\n",
|
||||||
|
" print(chunk.content, end=\"\", flush=True)\n",
|
||||||
|
" print() # Newline\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"\\nError streaming Chat Model: {e}. Ensure endpoint handler supports streaming output format.\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"### Async Usage\n",
|
||||||
|
"\n",
|
||||||
|
"# AInvoke (Async)\n",
|
||||||
|
"try:\n",
|
||||||
|
" async_response = await chat.ainvoke(messages)\n",
|
||||||
|
" print(\"--- Async Invoke Response ---\")\n",
|
||||||
|
" print(async_response.content)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(f\"Error invoking Chat Model asynchronously: {e}.\")\n",
|
||||||
|
"\n",
|
||||||
|
"# AStream (Async)\n",
|
||||||
|
"print(\"\\n--- Async Stream Response ---\")\n",
|
||||||
|
"try:\n",
|
||||||
|
" async for chunk in chat.astream(messages):\n",
|
||||||
|
" print(chunk.content, end=\"\", flush=True)\n",
|
||||||
|
" print() # Newline\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"\\nError streaming Chat Model asynchronously: {e}. Ensure endpoint handler supports streaming output format.\\n\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Chaining\n",
|
||||||
|
"\n",
|
||||||
|
"The chat model integrates seamlessly with LangChain Expression Language (LCEL) chains."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||||
|
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||||
|
"\n",
|
||||||
|
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||||
|
" [\n",
|
||||||
|
" (\"system\", \"You are a helpful assistant.\"),\n",
|
||||||
|
" (\"human\", \"{input}\"),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"parser = StrOutputParser()\n",
|
||||||
|
"\n",
|
||||||
|
"chain = prompt | chat | parser\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" chain_response = chain.invoke(\n",
|
||||||
|
" {\"input\": \"Explain the concept of serverless computing in simple terms.\"}\n",
|
||||||
|
" )\n",
|
||||||
|
" print(\"--- Chain Response ---\")\n",
|
||||||
|
" print(chain_response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(f\"Error running chain: {e}\")\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Async chain\n",
|
||||||
|
"try:\n",
|
||||||
|
" async_chain_response = await chain.ainvoke(\n",
|
||||||
|
" {\"input\": \"What are the benefits of using RunPod for AI/ML workloads?\"}\n",
|
||||||
|
" )\n",
|
||||||
|
" print(\"--- Async Chain Response ---\")\n",
|
||||||
|
" print(async_chain_response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(f\"Error running async chain: {e}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Model Features (Endpoint Dependent)\n",
|
||||||
|
"\n",
|
||||||
|
"The availability of advanced features depends **heavily** on the specific implementation of your RunPod endpoint handler. The `ChatRunPod` integration provides the basic framework, but the handler must support the underlying functionality.\n",
|
||||||
|
"\n",
|
||||||
|
"| Feature | Integration Support | Endpoint Dependent? | Notes |\n",
|
||||||
|
"| :--------------------------------------------------------- | :-----------------: | :-----------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n",
|
||||||
|
"| [Tool calling](/docs/how_to/tool_calling) | ❌ | ✅ | Requires handler to process tool definitions and return tool calls (e.g., OpenAI format). Integration needs parsing logic. |\n",
|
||||||
|
"| [Structured output](/docs/how_to/structured_output) | ❌ | ✅ | Requires handler support for forcing structured output (JSON mode, function calling). Integration needs parsing logic. |\n",
|
||||||
|
"| JSON mode | ❌ | ✅ | Requires handler to accept a `json_mode` parameter (or similar) and guarantee JSON output. |\n",
|
||||||
|
"| [Image input](/docs/how_to/multimodal_inputs) | ❌ | ✅ | Requires multimodal handler accepting image data (e.g., base64). Integration does not support multimodal messages. |\n",
|
||||||
|
"| Audio input | ❌ | ✅ | Requires handler accepting audio data. Integration does not support audio messages. |\n",
|
||||||
|
"| Video input | ❌ | ✅ | Requires handler accepting video data. Integration does not support video messages. |\n",
|
||||||
|
"| [Token-level streaming](/docs/how_to/chat_streaming) | ✅ (Simulated) | ✅ | Polls `/stream`. Requires handler to populate `stream` list in status response with token chunks (e.g., `[{\"output\": \"token\"}]`). True low-latency streaming not built-in. |\n",
|
||||||
|
"| Native async | ✅ | ✅ | Core `ainvoke`/`astream` implemented. Relies on endpoint handler performance. |\n",
|
||||||
|
"| [Token usage](/docs/how_to/chat_token_usage_tracking) | ❌ | ✅ | Requires handler to return `prompt_tokens`, `completion_tokens` in the final response. Integration currently does not parse this. |\n",
|
||||||
|
"| [Logprobs](/docs/how_to/logprobs) | ❌ | ✅ | Requires handler to return log probabilities. Integration currently does not parse this. |\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"**Key Takeaway:** Standard chat invocation and simulated streaming work if the endpoint follows basic RunPod API conventions. Advanced features require specific handler implementations and potentially extending or customizing this integration package."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## API reference\n",
|
||||||
|
"\n",
|
||||||
|
"For detailed documentation of the `ChatRunPod` class, parameters, and methods, refer to the source code or the generated API reference (if available).\n",
|
||||||
|
"\n",
|
||||||
|
"Link to source code: [https://github.com/runpod/langchain-runpod/blob/main/langchain_runpod/chat_models.py](https://github.com/runpod/langchain-runpod/blob/main/langchain_runpod/chat_models.py)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
266
docs/docs/integrations/llms/runpod.ipynb
Normal file
266
docs/docs/integrations/llms/runpod.ipynb
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# RunPod LLM"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Get started with RunPod LLMs.\n",
|
||||||
|
"\n",
|
||||||
|
"## Overview\n",
|
||||||
|
"\n",
|
||||||
|
"This guide covers how to use the LangChain `RunPod` LLM class to interact with text generation models hosted on [RunPod Serverless](https://www.runpod.io/serverless-gpu)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"\n",
|
||||||
|
"1. **Install the package:**\n",
|
||||||
|
" ```bash\n",
|
||||||
|
" pip install -qU langchain-runpod\n",
|
||||||
|
" ```\n",
|
||||||
|
"2. **Deploy an LLM Endpoint:** Follow the setup steps in the [RunPod Provider Guide](/docs/integrations/providers/runpod#setup) to deploy a compatible text generation endpoint on RunPod Serverless and get its Endpoint ID.\n",
|
||||||
|
"3. **Set Environment Variables:** Make sure `RUNPOD_API_KEY` and `RUNPOD_ENDPOINT_ID` are set."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import getpass\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"# Make sure environment variables are set (or pass them directly to RunPod)\n",
|
||||||
|
"if \"RUNPOD_API_KEY\" not in os.environ:\n",
|
||||||
|
" os.environ[\"RUNPOD_API_KEY\"] = getpass.getpass(\"Enter your RunPod API Key: \")\n",
|
||||||
|
"if \"RUNPOD_ENDPOINT_ID\" not in os.environ:\n",
|
||||||
|
" os.environ[\"RUNPOD_ENDPOINT_ID\"] = input(\"Enter your RunPod Endpoint ID: \")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Instantiation\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize the `RunPod` class. You can pass model-specific parameters via `model_kwargs` and configure polling behavior."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_runpod import RunPod\n",
|
||||||
|
"\n",
|
||||||
|
"llm = RunPod(\n",
|
||||||
|
" # runpod_endpoint_id can be passed here if not set in env\n",
|
||||||
|
" model_kwargs={\n",
|
||||||
|
" \"max_new_tokens\": 256,\n",
|
||||||
|
" \"temperature\": 0.6,\n",
|
||||||
|
" \"top_k\": 50,\n",
|
||||||
|
" # Add other parameters supported by your endpoint handler\n",
|
||||||
|
" },\n",
|
||||||
|
" # Optional: Adjust polling\n",
|
||||||
|
" # poll_interval=0.3,\n",
|
||||||
|
" # max_polling_attempts=100\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Invocation\n",
|
||||||
|
"\n",
|
||||||
|
"Use the standard LangChain `.invoke()` and `.ainvoke()` methods to call the model. Streaming is also supported via `.stream()` and `.astream()` (simulated by polling the RunPod `/stream` endpoint)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"prompt = \"Write a tagline for an ice cream shop on the moon.\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Invoke (Sync)\n",
|
||||||
|
"try:\n",
|
||||||
|
" response = llm.invoke(prompt)\n",
|
||||||
|
" print(\"--- Sync Invoke Response ---\")\n",
|
||||||
|
" print(response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"Error invoking LLM: {e}. Ensure endpoint ID/API key are correct and endpoint is active/compatible.\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Stream (Sync, simulated via polling /stream)\n",
|
||||||
|
"print(\"\\n--- Sync Stream Response ---\")\n",
|
||||||
|
"try:\n",
|
||||||
|
" for chunk in llm.stream(prompt):\n",
|
||||||
|
" print(chunk, end=\"\", flush=True)\n",
|
||||||
|
" print() # Newline\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"\\nError streaming LLM: {e}. Ensure endpoint handler supports streaming output format.\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Async Usage"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# AInvoke (Async)\n",
|
||||||
|
"try:\n",
|
||||||
|
" async_response = await llm.ainvoke(prompt)\n",
|
||||||
|
" print(\"--- Async Invoke Response ---\")\n",
|
||||||
|
" print(async_response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(f\"Error invoking LLM asynchronously: {e}.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# AStream (Async)\n",
|
||||||
|
"print(\"\\n--- Async Stream Response ---\")\n",
|
||||||
|
"try:\n",
|
||||||
|
" async for chunk in llm.astream(prompt):\n",
|
||||||
|
" print(chunk, end=\"\", flush=True)\n",
|
||||||
|
" print() # Newline\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"\\nError streaming LLM asynchronously: {e}. Ensure endpoint handler supports streaming output format.\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Chaining\n",
|
||||||
|
"\n",
|
||||||
|
"The LLM integrates seamlessly with LangChain Expression Language (LCEL) chains."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||||
|
"from langchain_core.prompts import PromptTemplate\n",
|
||||||
|
"\n",
|
||||||
|
"# Assumes 'llm' variable is instantiated from the 'Instantiation' cell\n",
|
||||||
|
"prompt_template = PromptTemplate.from_template(\"Tell me a joke about {topic}\")\n",
|
||||||
|
"parser = StrOutputParser()\n",
|
||||||
|
"\n",
|
||||||
|
"chain = prompt_template | llm | parser\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" chain_response = chain.invoke({\"topic\": \"bears\"})\n",
|
||||||
|
" print(\"--- Chain Response ---\")\n",
|
||||||
|
" print(chain_response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(f\"Error running chain: {e}\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Async chain\n",
|
||||||
|
"try:\n",
|
||||||
|
" async_chain_response = await chain.ainvoke({\"topic\": \"robots\"})\n",
|
||||||
|
" print(\"--- Async Chain Response ---\")\n",
|
||||||
|
" print(async_chain_response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(f\"Error running async chain: {e}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Endpoint Considerations\n",
|
||||||
|
"\n",
|
||||||
|
"- **Input:** The endpoint handler should expect the prompt string within `{\"input\": {\"prompt\": \"...\", ...}}`.\n",
|
||||||
|
"- **Output:** The handler should return the generated text within the `\"output\"` key of the final status response (e.g., `{\"output\": \"Generated text...\"}` or `{\"output\": {\"text\": \"...\"}}`).\n",
|
||||||
|
"- **Streaming:** For simulated streaming via the `/stream` endpoint, the handler must populate the `\"stream\"` key in the status response with a list of chunk dictionaries, like `[{\"output\": \"token1\"}, {\"output\": \"token2\"}]`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## API reference\n",
|
||||||
|
"\n",
|
||||||
|
"For detailed documentation of the `RunPod` LLM class, parameters, and methods, refer to the source code or the generated API reference (if available).\n",
|
||||||
|
"\n",
|
||||||
|
"Link to source code: [https://github.com/runpod/langchain-runpod/blob/main/langchain_runpod/llms.py](https://github.com/runpod/langchain-runpod/blob/main/langchain_runpod/llms.py)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
173
docs/docs/integrations/providers/runpod.ipynb
Normal file
173
docs/docs/integrations/providers/runpod.ipynb
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Runpod\n",
|
||||||
|
"\n",
|
||||||
|
"[RunPod](https://www.runpod.io/) provides GPU cloud infrastructure, including Serverless endpoints optimized for deploying and scaling AI models.\n",
|
||||||
|
"\n",
|
||||||
|
"This guide covers how to use the `langchain-runpod` integration package to connect LangChain applications to models hosted on [RunPod Serverless](https://www.runpod.io/serverless-gpu).\n",
|
||||||
|
"\n",
|
||||||
|
"The integration offers interfaces for both standard Language Models (LLMs) and Chat Models."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Intstallation\n",
|
||||||
|
"\n",
|
||||||
|
"Install the dedicated partner package:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install -qU langchain-runpod"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"### 1. Deploy an Endpoint on RunPod\n",
|
||||||
|
"- Navigate to your [RunPod Serverless Console](https://www.runpod.io/console/serverless/user/endpoints).\n",
|
||||||
|
"- Create a \\\"New Endpoint\\\", selecting an appropriate GPU and template (e.g., vLLM, TGI, text-generation-webui) compatible with your model and the expected input/output format (see component guides or the package [README](https://github.com/runpod/langchain-runpod)).\n",
|
||||||
|
"- Configure settings and deploy.\n",
|
||||||
|
"- **Crucially, copy the Endpoint ID** after deployment."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2. Set API Credentials\n",
|
||||||
|
"The integration needs your RunPod API Key and the Endpoint ID. Set them as environment variables for secure access:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import getpass\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"os.environ[\"RUNPOD_API_KEY\"] = getpass.getpass(\"Enter your RunPod API Key: \")\n",
|
||||||
|
"os.environ[\"RUNPOD_ENDPOINT_ID\"] = input(\"Enter your RunPod Endpoint ID: \")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"*(Optional)* If using different endpoints for LLM and Chat models, you might need to set `RUNPOD_CHAT_ENDPOINT_ID` or pass the ID directly during initialization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Components\n",
|
||||||
|
"This package provides two main components:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 1. LLM\n",
|
||||||
|
"\n",
|
||||||
|
"For interacting with standard text completion models.\n",
|
||||||
|
"\n",
|
||||||
|
"See the [RunPod LLM Integration Guide](/docs/integrations/llms/runpod) for detailed usage"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_runpod import RunPod\n",
|
||||||
|
"\n",
|
||||||
|
"# Example initialization (uses environment variables)\n",
|
||||||
|
"llm = RunPod(model_kwargs={\"max_new_tokens\": 100}) # Add generation params here\n",
|
||||||
|
"\n",
|
||||||
|
"# Example Invocation\n",
|
||||||
|
"try:\n",
|
||||||
|
" response = llm.invoke(\"Write a short poem about the cloud.\")\n",
|
||||||
|
" print(response)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"Error invoking LLM: {e}. Ensure endpoint ID and API key are correct and endpoint is active.\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2. Chat Model\n",
|
||||||
|
"\n",
|
||||||
|
"For interacting with conversational models.\n",
|
||||||
|
"\n",
|
||||||
|
"See the [RunPod Chat Model Integration Guide](/docs/integrations/chat/runpod) for detailed usage and feature support."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_core.messages import HumanMessage\n",
|
||||||
|
"from langchain_runpod import ChatRunPod\n",
|
||||||
|
"\n",
|
||||||
|
"# Example initialization (uses environment variables)\n",
|
||||||
|
"chat = ChatRunPod(model_kwargs={\"temperature\": 0.8}) # Add generation params here\n",
|
||||||
|
"\n",
|
||||||
|
"# Example Invocation\n",
|
||||||
|
"try:\n",
|
||||||
|
" response = chat.invoke(\n",
|
||||||
|
" [HumanMessage(content=\"Explain RunPod Serverless in one sentence.\")]\n",
|
||||||
|
" )\n",
|
||||||
|
" print(response.content)\n",
|
||||||
|
"except Exception as e:\n",
|
||||||
|
" print(\n",
|
||||||
|
" f\"Error invoking Chat Model: {e}. Ensure endpoint ID and API key are correct and endpoint is active.\"\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -106,9 +106,9 @@
|
|||||||
"from langchain_core.tools import Tool\n",
|
"from langchain_core.tools import Tool\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# You can create the tool to pass to an agent\n",
|
"# You can create the tool to pass to an agent\n",
|
||||||
"repl_tool = Tool(\n",
|
"custom_tool = Tool(\n",
|
||||||
" name=\"python_repl\",\n",
|
" name=\"web search\",\n",
|
||||||
" description=\"A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.\",\n",
|
" description=\"Search the web for information\",\n",
|
||||||
" func=search.run,\n",
|
" func=search.run,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
|
@ -571,3 +571,8 @@ packages:
|
|||||||
- name: langchain-perplexity
|
- name: langchain-perplexity
|
||||||
path: libs/partners/perplexity
|
path: libs/partners/perplexity
|
||||||
repo: langchain-ai/langchain
|
repo: langchain-ai/langchain
|
||||||
|
- name: langchain-runpod
|
||||||
|
repo: runpod/langchain-runpod
|
||||||
|
path: .
|
||||||
|
name_title: RunPod
|
||||||
|
provider_page: runpod
|
||||||
|
Loading…
Reference in New Issue
Block a user