Merge branch 'master' into agentic-finance

2025-07-12 07:50:39 +00:00 · 2025-03-27 15:16:17 -04:00 · 2025-03-27 15:16:17 -04:00 · f17a8a9341
commit f17a8a9341
parent 9455a9acb1 1cf91a2386
106 changed files with 3446 additions and 1242 deletions
--- a/cookbook/Gemma_LangChain.ipynb
+++ b/cookbook/Gemma_LangChain.ipynb
@ -60,7 +60,7 @@
    "id": "CI8Elyc5gBQF"
   },
   "source": [
-    "Go to the VertexAI Model Garden on Google Cloud [console](https://pantheon.corp.google.com/vertex-ai/publishers/google/model-garden/335), and deploy the desired version of Gemma to VertexAI. It will take a few minutes, and after the endpoint it ready, you need to copy its number."
+    "Go to the VertexAI Model Garden on Google Cloud [console](https://pantheon.corp.google.com/vertex-ai/publishers/google/model-garden/335), and deploy the desired version of Gemma to VertexAI. It will take a few minutes, and after the endpoint is ready, you need to copy its number."
   ]
  },
  {
--- a/docs/docs/how_to/chat_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/chat_token_usage_tracking.ipynb
@ -16,7 +16,7 @@
    "\n",
    "Tracking [token](/docs/concepts/tokens/) usage to calculate cost is an important part of putting your app in production. This guide goes over how to obtain this information from your LangChain model calls.\n",
    "\n",
-    "This guide requires `langchain-anthropic` and `langchain-openai >= 0.1.9`."
+    "This guide requires `langchain-anthropic` and `langchain-openai >= 0.3.11`."
   ]
  },
  {
@ -38,19 +38,9 @@
    "\n",
    "OpenAI's Chat Completions API does not stream token usage statistics by default (see API reference\n",
    "[here](https://platform.openai.com/docs/api-reference/completions/create#completions-create-stream_options)).\n",
-    "To recover token counts when streaming with `ChatOpenAI`, set `stream_usage=True` as\n",
+    "To recover token counts when streaming with `ChatOpenAI` or `AzureChatOpenAI`, set `stream_usage=True` as\n",
    "demonstrated in this guide.\n",
    "\n",
-    "For `AzureChatOpenAI`, set `stream_options={\"include_usage\": True}` when calling\n",
-    "`.(a)stream`, or initialize with:\n",
-    "\n",
-    "```python\n",
-    "AzureChatOpenAI(\n",
-    "    ...,\n",
-    "    model_kwargs={\"stream_options\": {\"include_usage\": True}},\n",
-    ")\n",
-    "```\n",
-    "\n",
    ":::"
   ]
  },
@ -67,7 +57,7 @@
    "\n",
    "A number of model providers return token usage information as part of the chat generation response. When available, this information will be included on the `AIMessage` objects produced by the corresponding model.\n",
    "\n",
-    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`).\n",
+    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`). They will also include information on cached token usage and tokens from multi-modal data.\n",
    "\n",
    "Examples:\n",
    "\n",
@ -92,9 +82,9 @@
    }
   ],
   "source": [
-    "from langchain_openai import ChatOpenAI\n",
+    "from langchain.chat_models import init_chat_model\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
+    "llm = init_chat_model(model=\"gpt-4o-mini\")\n",
    "openai_response = llm.invoke(\"hello\")\n",
    "openai_response.usage_metadata"
   ]
@ -132,37 +122,6 @@
    "anthropic_response.usage_metadata"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "6d4efc15-ba9f-4b3d-9278-8e01f99f263f",
-   "metadata": {},
-   "source": [
-    "### Using AIMessage.response_metadata\n",
-    "\n",
-    "Metadata from the model response is also included in the AIMessage [response_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.response_metadata) attribute. These data are typically not standardized. Note that different providers adopt different conventions for representing token counts:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "f156f9da-21f2-4c81-a714-54cbf9ad393e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "OpenAI: {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17}\n",
-      "\n",
-      "Anthropic: {'input_tokens': 8, 'output_tokens': 12}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'OpenAI: {openai_response.response_metadata[\"token_usage\"]}\\n')\n",
-    "print(f'Anthropic: {anthropic_response.response_metadata[\"usage\"]}')"
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "b4ef2c43-0ff6-49eb-9782-e4070c9da8d7",
@ -207,7 +166,7 @@
    }
   ],
   "source": [
-    "llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
+    "llm = init_chat_model(model=\"gpt-4o-mini\")\n",
    "\n",
    "aggregate = None\n",
    "for chunk in llm.stream(\"hello\", stream_usage=True):\n",
@ -318,7 +277,7 @@
    "    punchline: str = Field(description=\"answer to resolve the joke\")\n",
    "\n",
    "\n",
-    "llm = ChatOpenAI(\n",
+    "llm = init_chat_model(\n",
    "    model=\"gpt-4o-mini\",\n",
    "    stream_usage=True,\n",
    ")\n",
@ -326,10 +285,10 @@
    "# chat model and appends a parser.\n",
    "structured_llm = llm.with_structured_output(Joke)\n",
    "\n",
-    "async for event in structured_llm.astream_events(\"Tell me a joke\", version=\"v2\"):\n",
+    "async for event in structured_llm.astream_events(\"Tell me a joke\"):\n",
    "    if event[\"event\"] == \"on_chat_model_end\":\n",
    "        print(f'Token usage: {event[\"data\"][\"output\"].usage_metadata}\\n')\n",
-    "    elif event[\"event\"] == \"on_chain_end\":\n",
+    "    elif event[\"event\"] == \"on_chain_end\" and event[\"name\"] == \"RunnableSequence\":\n",
    "        print(event[\"data\"][\"output\"])\n",
    "    else:\n",
    "        pass"
@ -350,17 +309,18 @@
   "source": [
    "## Using callbacks\n",
    "\n",
-    "There are also some API-specific callback context managers that allow you to track token usage across multiple calls. They are currently only implemented for the OpenAI API and Bedrock Anthropic API, and are available in `langchain-community`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "64e52d21",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install -qU langchain-community"
+    ":::info Requires ``langchain-core>=0.3.49``\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "LangChain implements a callback handler and context manager that will track token usage across calls of any chat model that returns `usage_metadata`.\n",
+    "\n",
+    "There are also some API-specific callback context managers that maintain pricing for different models, allowing for cost estimation in real time. They are currently only implemented for the OpenAI API and Bedrock Anthropic API, and are available in `langchain-community`:\n",
+    "\n",
+    "- [get_openai_callback](https://python.langchain.com/api_reference/community/callbacks/langchain_community.callbacks.manager.get_openai_callback.html)\n",
+    "- [get_bedrock_anthropic_callback](https://python.langchain.com/api_reference/community/callbacks/langchain_community.callbacks.manager.get_bedrock_anthropic_callback.html)\n",
+    "\n",
+    "Below, we demonstrate the general-purpose usage metadata callback manager. We can track token usage through configuration or as a context manager."
   ]
  },
  {
@ -368,41 +328,84 @@
   "id": "6f043cb9",
   "metadata": {},
   "source": [
-    "### OpenAI\n",
+    "### Tracking token usage through configuration\n",
    "\n",
-    "Let's first look at an extremely simple example of tracking token usage for a single Chat model call."
+    "To track token usage through configuration, instantiate a `UsageMetadataCallbackHandler` and pass it into the config:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 17,
   "id": "b04a4486-72fd-48ce-8f9e-5d281b441195",
   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'gpt-4o-mini-2024-07-18': {'input_tokens': 8,\n",
+       "  'output_tokens': 10,\n",
+       "  'total_tokens': 18,\n",
+       "  'input_token_details': {'audio': 0, 'cache_read': 0},\n",
+       "  'output_token_details': {'audio': 0, 'reasoning': 0}},\n",
+       " 'claude-3-5-haiku-20241022': {'input_tokens': 8,\n",
+       "  'output_tokens': 21,\n",
+       "  'total_tokens': 29,\n",
+       "  'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_core.callbacks import UsageMetadataCallbackHandler\n",
+    "\n",
+    "llm_1 = init_chat_model(model=\"openai:gpt-4o-mini\")\n",
+    "llm_2 = init_chat_model(model=\"anthropic:claude-3-5-haiku-latest\")\n",
+    "\n",
+    "callback = UsageMetadataCallbackHandler()\n",
+    "result_1 = llm_1.invoke(\"Hello\", config={\"callbacks\": [callback]})\n",
+    "result_2 = llm_2.invoke(\"Hello\", config={\"callbacks\": [callback]})\n",
+    "callback.usage_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7a290085-e541-4233-afe4-637ec5032bfd",
+   "metadata": {},
+   "source": [
+    "### Tracking token usage using a context manager\n",
+    "\n",
+    "You can also use `get_usage_metadata_callback` to create a context manager and aggregate usage metadata there:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "4728f55a-24e1-48cd-a195-09d037821b1e",
+   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Tokens Used: 27\n",
-      "\tPrompt Tokens: 11\n",
-      "\tCompletion Tokens: 16\n",
-      "Successful Requests: 1\n",
-      "Total Cost (USD): $2.95e-05\n"
+      "{'gpt-4o-mini-2024-07-18': {'input_tokens': 8, 'output_tokens': 10, 'total_tokens': 18, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}, 'claude-3-5-haiku-20241022': {'input_tokens': 8, 'output_tokens': 21, 'total_tokens': 29, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}\n"
     ]
    }
   ],
   "source": [
-    "from langchain_community.callbacks.manager import get_openai_callback\n",
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_core.callbacks import get_usage_metadata_callback\n",
    "\n",
-    "llm = ChatOpenAI(\n",
-    "    model=\"gpt-4o-mini\",\n",
-    "    temperature=0,\n",
-    "    stream_usage=True,\n",
-    ")\n",
+    "llm_1 = init_chat_model(model=\"openai:gpt-4o-mini\")\n",
+    "llm_2 = init_chat_model(model=\"anthropic:claude-3-5-haiku-latest\")\n",
    "\n",
-    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb)"
+    "with get_usage_metadata_callback() as cb:\n",
+    "    llm_1.invoke(\"Hello\")\n",
+    "    llm_2.invoke(\"Hello\")\n",
+    "    print(cb.usage_metadata)"
   ]
  },
  {
@ -410,61 +413,7 @@
   "id": "c0ab6d27",
   "metadata": {},
   "source": [
-    "Anything inside the context manager will get tracked. Here's an example of using it to track multiple calls in sequence."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "05f22a1d-b021-490f-8840-f628a07459f2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "54\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    result2 = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb.total_tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "c00c9158-7bb4-4279-88e6-ea70f46e6ac2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tokens Used: 27\n",
-      "\tPrompt Tokens: 11\n",
-      "\tCompletion Tokens: 16\n",
-      "Successful Requests: 1\n",
-      "Total Cost (USD): $2.95e-05\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    for chunk in llm.stream(\"Tell me a joke\"):\n",
-    "        pass\n",
-    "    print(cb)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d8186e7b",
-   "metadata": {},
-   "source": [
-    "If a chain or agent with multiple steps in it is used, it will track all those steps."
+    "Either of these methods will aggregate token usage across multiple calls to each model. For example, you can use it in an [agent](https://python.langchain.com/docs/concepts/agents/) to track token usage across repeated calls to one model:"
   ]
  },
  {
@ -474,138 +423,63 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install -qU langchain langchain-aws wikipedia"
+    "%pip install -qU langgraph"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
-   "id": "5d1125c6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents import AgentExecutor, create_tool_calling_agent, load_tools\n",
-    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\"system\", \"You're a helpful assistant\"),\n",
-    "        (\"human\", \"{input}\"),\n",
-    "        (\"placeholder\", \"{agent_scratchpad}\"),\n",
-    "    ]\n",
-    ")\n",
-    "tools = load_tools([\"wikipedia\"])\n",
-    "agent = create_tool_calling_agent(llm, tools, prompt)\n",
-    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "3950d88b-8bfb-4294-b75b-e6fd421e633c",
+   "execution_count": 20,
+   "id": "fe945078-ee2d-43ba-8cdf-afb2f2f4ecef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
      "\n",
+      "What's the weather in Boston?\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  get_weather (call_izMdhUYpp9Vhx7DTNAiybzGa)\n",
+      " Call ID: call_izMdhUYpp9Vhx7DTNAiybzGa\n",
+      "  Args:\n",
+      "    location: Boston\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: get_weather\n",
      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `wikipedia` with `{'query': 'hummingbird scientific name'}`\n",
+      "It's sunny.\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
      "\n",
+      "The weather in Boston is sunny.\n",
      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mPage: Hummingbird\n",
-      "Summary: Hummingbirds are birds native to the Americas and comprise the biological family Trochilidae. With approximately 366 species and 113 genera, they occur from Alaska to Tierra del Fuego, but most species are found in Central and South America. As of 2024, 21 hummingbird species are listed as endangered or critically endangered, with numerous species declining in population.\n",
-      "Hummingbirds have varied specialized characteristics to enable rapid, maneuverable flight: exceptional metabolic capacity, adaptations to high altitude, sensitive visual and communication abilities, and long-distance migration in some species. Among all birds, male hummingbirds have the widest diversity of plumage color, particularly in blues, greens, and purples. Hummingbirds are the smallest mature birds, measuring 7.5–13 cm (3–5 in) in length. The smallest is the 5 cm (2.0 in) bee hummingbird, which weighs less than 2.0 g (0.07 oz), and the largest is the 23 cm (9 in) giant hummingbird, weighing 18–24 grams (0.63–0.85 oz). Noted for long beaks, hummingbirds are specialized for feeding on flower nectar, but all species also consume small insects.\n",
-      "They are known as hummingbirds because of the humming sound created by their beating wings, which flap at high frequencies audible to other birds and humans. They hover at rapid wing-flapping rates, which vary from around 12 beats per second in the largest species to 80 per second in small hummingbirds.\n",
-      "Hummingbirds have the highest mass-specific metabolic rate of any homeothermic animal. To conserve energy when food is scarce and at night when not foraging, they can enter torpor, a state similar to hibernation, and slow their metabolic rate to 1⁄15 of its normal rate. While most hummingbirds do not migrate, the rufous hummingbird has one of the longest migrations among birds, traveling twice per year between Alaska and Mexico, a distance of about 3,900 miles (6,300 km).\n",
-      "Hummingbirds split from their sister group, the swifts and treeswifts, around 42 million years ago. The oldest known fossil hummingbird is Eurotrochilus, from the Rupelian Stage of Early Oligocene Europe.\n",
-      "\n",
-      "Page: Rufous hummingbird\n",
-      "Summary: The rufous hummingbird (Selasphorus rufus) is a small hummingbird, about 8 cm (3.1 in) long with a long, straight and slender bill. These birds are known for their extraordinary flight skills, flying 2,000 mi (3,200 km) during their migratory transits. It is one of nine species in the genus Selasphorus.\n",
-      "\n",
-      "\n",
-      "\n",
-      "Page: Allen's hummingbird\n",
-      "Summary: Allen's hummingbird (Selasphorus sasin) is a species of hummingbird that breeds in the western United States. It is one of seven species in the genus Selasphorus.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `wikipedia` with `{'query': 'fastest bird species'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mPage: List of birds by flight speed\n",
-      "Summary: This is a list of the fastest flying birds in the world. A bird's velocity is necessarily variable; a hunting bird will reach much greater speeds while diving to catch prey than when flying horizontally. The bird that can achieve the greatest airspeed is the peregrine falcon (Falco peregrinus), able to exceed 320 km/h (200 mph) in its dives. A close relative of the common swift, the white-throated needletail (Hirundapus caudacutus), is commonly reported as the fastest bird in level flight with a reported top speed of 169 km/h (105 mph). This record remains unconfirmed as the measurement methods have never been published or verified. The record for the fastest confirmed level flight by a bird is 111.5 km/h (69.3 mph) held by the common swift.\n",
-      "\n",
-      "Page: Fastest animals\n",
-      "Summary: This is a list of the fastest animals in the world, by types of animal.\n",
-      "\n",
-      "Page: Falcon\n",
-      "Summary: Falcons () are birds of prey in the genus Falco, which includes about 40 species. Falcons are widely distributed on all continents of the world except Antarctica, though closely related raptors did occur there in the Eocene.\n",
-      "Adult falcons have thin, tapered wings, which enable them to fly at high speed and change direction rapidly. Fledgling falcons, in their first year of flying, have longer flight feathers, which make their configuration more like that of a general-purpose bird such as a broad wing. This makes flying easier while learning the exceptional skills required to be effective hunters as adults.\n",
-      "The falcons are the largest genus in the Falconinae subfamily of Falconidae, which itself also includes another subfamily comprising caracaras and a few other species. All these birds kill with their beaks, using a tomial \"tooth\" on the side of their beaks—unlike the hawks, eagles, and other birds of prey in the Accipitridae, which use their feet.\n",
-      "The largest falcon is the gyrfalcon at up to 65 cm in length.  The smallest falcon species is the pygmy falcon, which measures just 20 cm.  As with hawks and owls, falcons exhibit sexual dimorphism, with the females typically larger than the males, thus allowing a wider range of prey species.\n",
-      "Some small falcons with long, narrow wings are called \"hobbies\" and some which hover while hunting are called \"kestrels\".\n",
-      "As is the case with many birds of prey, falcons have exceptional powers of vision; the visual acuity of one species has been measured at 2.6 times that of a normal human. Peregrine falcons have been recorded diving at speeds of 320 km/h (200 mph), making them the fastest-moving creatures on Earth; the fastest recorded dive attained a vertical speed of 390 km/h (240 mph).\u001b[0m\u001b[32;1m\u001b[1;3mThe scientific name for a hummingbird is Trochilidae. The fastest bird species in level flight is the common swift, which holds the record for the fastest confirmed level flight by a bird at 111.5 km/h (69.3 mph). The peregrine falcon is known to exceed speeds of 320 km/h (200 mph) in its dives, making it the fastest bird in terms of diving speed.\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Total Tokens: 1675\n",
-      "Prompt Tokens: 1538\n",
-      "Completion Tokens: 137\n",
-      "Total Cost (USD): $0.0009745000000000001\n"
+      "Total usage: {'gpt-4o-mini-2024-07-18': {'input_token_details': {'audio': 0, 'cache_read': 0}, 'input_tokens': 125, 'total_tokens': 149, 'output_tokens': 24, 'output_token_details': {'audio': 0, 'reasoning': 0}}}\n"
     ]
    }
   ],
   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    response = agent_executor.invoke(\n",
-    "        {\n",
-    "            \"input\": \"What's a hummingbird's scientific name and what's the fastest bird species?\"\n",
-    "        }\n",
-    "    )\n",
-    "    print(f\"Total Tokens: {cb.total_tokens}\")\n",
-    "    print(f\"Prompt Tokens: {cb.prompt_tokens}\")\n",
-    "    print(f\"Completion Tokens: {cb.completion_tokens}\")\n",
-    "    print(f\"Total Cost (USD): ${cb.total_cost}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ebc9122b-050b-4006-b763-264b0b26d9df",
-   "metadata": {},
-   "source": [
-    "### Bedrock Anthropic\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
-    "The `get_bedrock_anthropic_callback` works very similarly:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "1837c807-136a-49d8-9c33-060e58dc16d2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tokens Used: 96\n",
-      "\tPrompt Tokens: 26\n",
-      "\tCompletion Tokens: 70\n",
-      "Successful Requests: 2\n",
-      "Total Cost (USD): $0.001888\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain_aws import ChatBedrock\n",
-    "from langchain_community.callbacks.manager import get_bedrock_anthropic_callback\n",
    "\n",
-    "llm = ChatBedrock(model_id=\"anthropic.claude-v2\")\n",
+    "# Create a tool\n",
+    "def get_weather(location: str) -> str:\n",
+    "    \"\"\"Get the weather at a location.\"\"\"\n",
+    "    return \"It's sunny.\"\n",
    "\n",
-    "with get_bedrock_anthropic_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    result2 = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb)"
+    "\n",
+    "callback = UsageMetadataCallbackHandler()\n",
+    "\n",
+    "tools = [get_weather]\n",
+    "agent = create_react_agent(\"openai:gpt-4o-mini\", tools)\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": [{\"role\": \"user\", \"content\": \"What's the weather in Boston?\"}]},\n",
+    "    stream_mode=\"values\",\n",
+    "    config={\"callbacks\": [callback]},\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()\n",
+    "\n",
+    "\n",
+    "print(f\"\\nTotal usage: {callback.usage_metadata}\")"
   ]
  },
  {
--- a/docs/docs/how_to/custom_chat_model.ipynb
+++ b/docs/docs/how_to/custom_chat_model.ipynb
@ -247,6 +247,7 @@
    "            additional_kwargs={},  # Used to add additional payload to the message\n",
    "            response_metadata={  # Use for response metadata\n",
    "                \"time_in_seconds\": 3,\n",
+    "                \"model_name\": self.model_name,\n",
    "            },\n",
    "            usage_metadata={\n",
    "                \"input_tokens\": ct_input_tokens,\n",
@ -309,7 +310,10 @@
    "\n",
    "        # Let's add some other information (e.g., response metadata)\n",
    "        chunk = ChatGenerationChunk(\n",
-    "            message=AIMessageChunk(content=\"\", response_metadata={\"time_in_sec\": 3})\n",
+    "            message=AIMessageChunk(\n",
+    "                content=\"\",\n",
+    "                response_metadata={\"time_in_sec\": 3, \"model_name\": self.model_name},\n",
+    "            )\n",
    "        )\n",
    "        if run_manager:\n",
    "            # This is optional in newer versions of LangChain\n",
--- a/docs/docs/integrations/providers/vectara/vectara_chat.ipynb
+++ b/docs/docs/integrations/providers/vectara/vectara_chat.ipynb
@ -5,21 +5,38 @@
   "id": "134a0785",
   "metadata": {},
   "source": [
-    "# Vectara Chat\n",
+    "## Overview\n",
    "\n",
    "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
-    "\n",
    "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
    "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
    "2. ML-based chunking that provides state of the art performance.\n",
    "3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.\n",
    "4. Its own internal vector database where text chunks and embedding vectors are stored.\n",
-    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments (including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
+    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
    "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
    "\n",
-    "See the [Vectara API documentation](https://docs.vectara.com/docs/) for more information on how to use the API.\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
    "\n",
-    "This notebook shows how to use Vectara's [Chat](https://docs.vectara.com/docs/api-reference/chat-apis/chat-apis-overview) functionality, which provides automatic storage of conversation history and ensures follow up questions consider that history."
+    "\n",
+    "This notebook shows how to use Vectara's [Chat](https://docs.vectara.com/docs/api-reference/chat-apis/chat-apis-overview) functionality, which provides automatic storage of conversation history and ensures follow up questions consider that history.\n",
+    "\n",
+    "### Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4a2f525-4805-4880-8bfa-18fe6f1cd1c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
   ]
  },
  {
@ -27,17 +44,19 @@
   "id": "56372c5b",
   "metadata": {},
   "source": [
-    "# Getting Started\n",
+    "## Getting Started\n",
    "\n",
    "To get started, use the following steps:\n",
-    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial. Once you have completed your sign up you will have a Vectara customer ID. You can find your customer ID by clicking on your name, on the top-right of the Vectara console window.\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
    "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
    "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
    "\n",
-    "To use LangChain with Vectara, you'll need to have these three values: `customer ID`, `corpus ID` and `api_key`.\n",
-    "You can provide those to LangChain in two ways:\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
    "\n",
-    "1. Include in your environment these three variables: `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`.\n",
+    "## Instantiation\n",
+    "\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
    "\n",
    "   For example, you can set these variables using os.environ and getpass as follows:\n",
    "\n",
@ -45,8 +64,6 @@
    "import os\n",
    "import getpass\n",
    "\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = getpass.getpass(\"Vectara Customer ID:\")\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = getpass.getpass(\"Vectara Corpus ID:\")\n",
    "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
    "```\n",
    "\n",
@ -54,17 +71,16 @@
    "\n",
    "```python\n",
    "vectara = Vectara(\n",
-    "                vectara_customer_id=vectara_customer_id,\n",
-    "                vectara_corpus_id=vectara_corpus_id,\n",
-    "                vectara_api_key=vectara_api_key\n",
-    "            )\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
    "```\n",
+    "\n",
    "In this notebook we assume they are provided in the environment."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "70c4e529",
   "metadata": {
    "tags": []
@ -73,14 +89,15 @@
   "source": [
    "import os\n",
    "\n",
-    "os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"<VECTARA_API_KEY>\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"<VECTARA_CORPUS_KEY>\"\n",
    "\n",
-    "from langchain_community.vectorstores import Vectara\n",
-    "from langchain_community.vectorstores.vectara import (\n",
-    "    RerankConfig,\n",
-    "    SummaryConfig,\n",
+    "from langchain_vectara import Vectara\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    CorpusConfig,\n",
+    "    GenerationConfig,\n",
+    "    MmrReranker,\n",
+    "    SearchConfig,\n",
    "    VectaraQueryConfig,\n",
    ")"
   ]
@ -101,7 +118,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "01c46e92",
   "metadata": {
    "tags": []
@ -110,10 +127,11 @@
   "source": [
    "from langchain_community.document_loaders import TextLoader\n",
    "\n",
-    "loader = TextLoader(\"state_of_the_union.txt\")\n",
+    "loader = TextLoader(\"../document_loaders/example_data/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "\n",
-    "vectara = Vectara.from_documents(documents, embedding=None)"
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
+    "vectara = Vectara.from_documents(documents, embedding=None, corpus_key=corpus_key)"
   ]
  },
  {
@ -126,18 +144,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "1b41a10b-bf68-4689-8f00-9aed7675e2ab",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
-    "summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang=\"eng\")\n",
-    "rerank_config = RerankConfig(reranker=\"mmr\", rerank_k=50, mmr_diversity_bias=0.2)\n",
-    "config = VectaraQueryConfig(\n",
-    "    k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config\n",
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key, limit=25)],\n",
+    "    reranker=MmrReranker(diversity_bias=0.2),\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
+    ")\n",
+    "\n",
    "\n",
    "bot = vectara.as_chat(config)"
   ]
@ -147,12 +176,15 @@
   "id": "83f38c18-ac82-45f4-a79e-8b37ce1ae115",
   "metadata": {},
   "source": [
+    "\n",
+    "## Invocation\n",
+    "\n",
    "Here's an example of asking a question with no chat history"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "bc672290-8a8b-4828-a90c-f1bbdd6b3920",
   "metadata": {
    "tags": []
@ -161,10 +193,10 @@
    {
     "data": {
      "text/plain": [
-       "'The President expressed gratitude to Justice Breyer and highlighted the significance of nominating Ketanji Brown Jackson to the Supreme Court, praising her legal expertise and commitment to upholding excellence [1]. The President also reassured the public about the situation with gas prices and the conflict in Ukraine, emphasizing unity with allies and the belief that the world will emerge stronger from these challenges [2][4]. Additionally, the President shared personal experiences related to economic struggles and the importance of passing the American Rescue Plan to support those in need [3]. The focus was also on job creation and economic growth, acknowledging the impact of inflation on families [5]. While addressing cancer as a significant issue, the President discussed plans to enhance cancer research and support for patients and families [7].'"
+       "'The president stated that nominating someone to serve on the United States Supreme Court is one of the most serious constitutional responsibilities. He nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, describing her as one of the nation’s top legal minds who will continue Justice Breyer’s legacy of excellence and noting her experience as a former top litigator in private practice [1].'"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -183,7 +215,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "9c95460b-7116-4155-a9d2-c0fb027ee592",
   "metadata": {
    "tags": []
@ -192,10 +224,10 @@
    {
     "data": {
      "text/plain": [
-       "\"In his remarks, the President specified that Ketanji Brown Jackson is succeeding Justice Breyer on the United States Supreme Court[1]. The President praised Jackson as a top legal mind who will continue Justice Breyer's legacy of excellence. The nomination of Jackson was highlighted as a significant constitutional responsibility of the President[1]. The President emphasized the importance of this nomination and the qualities that Jackson brings to the role. The focus was on the transition from Justice Breyer to Judge Ketanji Brown Jackson on the Supreme Court[1].\""
+       "'Yes, the president mentioned that Ketanji Brown Jackson succeeded Justice Breyer [1].'"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -217,7 +249,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "936dc62f",
   "metadata": {
    "tags": []
@ -227,14 +259,14 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Judge Ketanji Brown Jackson is a nominee for the United States Supreme Court, known for her legal expertise and experience as a former litigator. She is praised for her potential to continue the legacy of excellence on the Court[1]. While the search results provide information on various topics like innovation, economic growth, and healthcare initiatives, they do not directly address Judge Ketanji Brown Jackson's specific accomplishments. Therefore, I do not have enough information to answer this question."
+      "The president acknowledged the significant impact of COVID-19 on the nation, expressing understanding of the public's fatigue and frustration. He emphasized the need to view COVID-19 not as a partisan issue but as a serious disease, urging unity among Americans. The president highlighted the progress made, noting that severe cases have decreased significantly, and mentioned new CDC guidelines allowing most Americans to be mask-free. He also pointed out the efforts to vaccinate the nation and provide economic relief, and the ongoing commitment to vaccinate the world [2], [3], [5]."
     ]
    }
   ],
   "source": [
    "output = {}\n",
    "curr_key = None\n",
-    "for chunk in bot.stream(\"what about her accopmlishments?\"):\n",
+    "for chunk in bot.stream(\"what did he said about the covid?\"):\n",
    "    for key in chunk:\n",
    "        if key not in output:\n",
    "            output[key] = chunk[key]\n",
@ -244,6 +276,83 @@
    "            print(chunk[key], end=\"\", flush=True)\n",
    "        curr_key = key"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cefdf72b1d90085a",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "## Chaining\n",
+    "\n",
+    "For additional capabilities you can use chaining."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "167bc806-395e-46bf-80cc-3c5d43164f42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "So, the president talked about how the COVID-19 sickness has affected a lot of people in the country. He said that it's important for everyone to work together to fight the sickness, no matter what political party they are in. The president also mentioned that they are working hard to give vaccines to people to help protect them from getting sick. They are also giving money and help to people who need it, like food, housing, and cheaper health insurance. The president also said that they are sending vaccines to many other countries to help people all around the world stay healthy.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_openai.chat_models import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"system\",\n",
+    "            \"You are a helpful assistant that explains the stuff to a five year old.  Vectara is providing the answer.\",\n",
+    "        ),\n",
+    "        (\"human\", \"{vectara_response}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def get_vectara_response(question: dict) -> str:\n",
+    "    \"\"\"\n",
+    "    Calls Vectara as_chat and returns the answer string.  This encapsulates\n",
+    "    the Vectara call.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        response = bot.invoke(question[\"question\"])\n",
+    "        return response[\"answer\"]\n",
+    "    except Exception as e:\n",
+    "        return \"I'm sorry, I couldn't get an answer from Vectara.\"\n",
+    "\n",
+    "\n",
+    "# Create the chain\n",
+    "chain = get_vectara_response | prompt | llm | StrOutputParser()\n",
+    "\n",
+    "\n",
+    "# Invoke the chain\n",
+    "result = chain.invoke({\"question\": \"what did he say about the covid?\"})\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3b8bb761-db4a-436c-8939-41e9f8652083",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "## API reference\n",
+    "\n",
+    "You can look at the [Chat](https://docs.vectara.com/docs/api-reference/chat-apis/chat-apis-overview) documentation for the details."
+   ]
  }
 ],
 "metadata": {
@ -262,7 +371,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.0"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/document_loaders/pdfminer.ipynb
+++ b/docs/docs/integrations/document_loaders/pdfminer.ipynb
@ -31,7 +31,7 @@
    "\n",
    "### Credentials\n",
    "\n",
-    "No credentials are required to use PyMuPDFLoader"
+    "No credentials are required to use PDFMinerLoader"
   ]
  },
  {
@ -60,7 +60,7 @@
   "source": [
    "### Installation\n",
    "\n",
-    "Install **langchain_community** and **pymupdf**."
+    "Install **langchain_community** and **pdfminer**."
   ]
  },
  {
@ -327,7 +327,7 @@
    "- By page\n",
    "- As a single text flow\n",
    "\n",
-    "By default PDFPlumberLoader will split the PDF by page."
+    "By default PDFMinerLoader will split the PDF by page."
   ]
  },
  {
--- a/docs/docs/integrations/document_loaders/pymupdf.ipynb
+++ b/docs/docs/integrations/document_loaders/pymupdf.ipynb
@ -289,7 +289,7 @@
    "- By page\n",
    "- As a single text flow\n",
    "\n",
-    "By default PDFPlumberLoader will split the PDF by page."
+    "By default PyMuPDFLoader will split the PDF by page."
   ]
  },
  {
--- a/docs/docs/integrations/graphs/memgraph.ipynb
+++ b/docs/docs/integrations/graphs/memgraph.ipynb
@ -38,7 +38,7 @@
    "To use LangChain, install and import all the necessary packages. We'll use the package manager [pip](https://pip.pypa.io/en/stable/installation/), along with the `--user` flag, to ensure proper permissions. If you've installed Python 3.4 or a later version, `pip` is included by default. You can install all the required packages using the following command:\n",
    "\n",
    "```\n",
-    "pip install langchain langchain-openai neo4j --user\n",
+    "pip install langchain langchain-openai langchain-memgraph --user\n",
    "```\n",
    "\n",
    "You can either run the provided code blocks in this notebook or use a separate Python file to experiment with Memgraph and LangChain."
@ -57,24 +57,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
-    "from langchain_community.chains.graph_qa.memgraph import MemgraphQAChain\n",
-    "from langchain_community.graphs import MemgraphGraph\n",
    "from langchain_core.prompts import PromptTemplate\n",
+    "from langchain_memgraph.chains.graph_qa import MemgraphQAChain\n",
+    "from langchain_memgraph.graphs.memgraph import Memgraph\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "url = os.environ.get(\"MEMGRAPH_URI\", \"bolt://localhost:7687\")\n",
    "username = os.environ.get(\"MEMGRAPH_USERNAME\", \"\")\n",
    "password = os.environ.get(\"MEMGRAPH_PASSWORD\", \"\")\n",
    "\n",
-    "graph = MemgraphGraph(\n",
-    "    url=url, username=username, password=password, refresh_schema=False\n",
-    ")"
+    "graph = Memgraph(url=url, username=username, password=password, refresh_schema=False)"
   ]
  },
  {
--- a/docs/docs/integrations/providers/memgraph.mdx
+++ b/docs/docs/integrations/providers/memgraph.mdx
@ -0,0 +1,40 @@
+# Memgraph
+
+>Memgraph is a high-performance, in-memory graph database that is optimized for real-time queries and analytics.
+>Get started with Memgraph by visiting [their website](https://memgraph.com/).
+
+## Installation and Setup
+
+- Install the Python SDK with `pip install langchain-memgraph`
+
+## MemgraphQAChain
+
+There exists a wrapper around Memgraph graph database that allows you to generate Cypher statements based on the user input
+and use them to retrieve relevant information from the database.
+
+```python
+from langchain_memgraph.chains.graph_qa import MemgraphQAChain
+from langchain_memgraph.graphs.memgraph import Memgraph
+```
+
+See a [usage example](/docs/integrations/graphs/memgraph)
+
+## Constructing a Knowledge Graph from unstructured data
+
+You can use the integration to construct a knowledge graph from unstructured data.
+
+```python
+from langchain_memgraph.graphs.memgraph import Memgraph
+from langchain_experimental.graph_transformers import LLMGraphTransformer
+```
+
+See a [usage example](/docs/integrations/graphs/memgraph)
+
+## Memgraph Tools and Toolkit
+
+Memgraph also provides a toolkit that allows you to interact with the Memgraph database.
+See a [usage example](/docs/integrations/tools/memgraph).
+
+```python
+from langchain_memgraph import MemgraphToolkit
+```
--- a/docs/docs/integrations/providers/vectara.ipynb
+++ b/docs/docs/integrations/providers/vectara.ipynb
@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "559f8e0e",
+   "metadata": {},
+   "source": [
+    "# Vectara\n",
+    "\n",
+    "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
+    "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
+    "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
+    "2. ML-based chunking that provides state of the art performance.\n",
+    "3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.\n",
+    "4. Its own internal vector database where text chunks and embedding vectors are stored.\n",
+    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
+    "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
+    "\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
+    "\n",
+    "This notebook shows how to use the basic retrieval functionality, when utilizing Vectara just as a Vector Store (without summarization), incuding: `similarity_search` and `similarity_search_with_score` as well as using the LangChain `as_retriever` functionality.\n",
+    "\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfdf03ba-d6f5-4b1e-86d3-a65c4bc99aa1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e97dcf11",
+   "metadata": {},
+   "source": [
+    "# Getting Started\n",
+    "\n",
+    "To get started, use the following steps:\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
+    "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
+    "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
+    "\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
+    "\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
+    "\n",
+    "   For example, you can set these variables using os.environ and getpass as follows:\n",
+    "\n",
+    "```python\n",
+    "import os\n",
+    "import getpass\n",
+    "\n",
+    "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
+    "```\n",
+    "\n",
+    "2. Add them to the `Vectara` vectorstore constructor:\n",
+    "\n",
+    "```python\n",
+    "vectara = Vectara(\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "In this notebook we assume they are provided in the environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "aac7a9a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"<VECTARA_API_KEY>\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"VECTARA_CORPUS_KEY\"\n",
+    "\n",
+    "from langchain_vectara import Vectara\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    ChainReranker,\n",
+    "    CorpusConfig,\n",
+    "    CustomerSpecificReranker,\n",
+    "    File,\n",
+    "    GenerationConfig,\n",
+    "    MmrReranker,\n",
+    "    SearchConfig,\n",
+    "    VectaraQueryConfig,\n",
+    ")\n",
+    "\n",
+    "vectara = Vectara(vectara_api_key=os.getenv(\"VECTARA_API_KEY\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "875ffb7e",
+   "metadata": {},
+   "source": [
+    "First we load the state-of-the-union text into Vectara.\n",
+    "\n",
+    "Note that we use the add_files interface which does not require any local processing or chunking - Vectara receives the file content and performs all the necessary pre-processing, chunking and embedding of the file into its knowledge store.\n",
+    "\n",
+    "In this case it uses a .txt file but the same works for many other [file types](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "be0a4973",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['state_of_the_union.txt']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
+    "file_obj = File(\n",
+    "    file_path=\"../document_loaders/example_data/state_of_the_union.txt\",\n",
+    "    metadata={\"source\": \"text_file\"},\n",
+    ")\n",
+    "vectara.add_files([file_obj], corpus_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22a6b953",
+   "metadata": {},
+   "source": [
+    "## Vectara RAG (retrieval augmented generation)\n",
+    "\n",
+    "We now create a `VectaraQueryConfig` object to control the retrieval and summarization options:\n",
+    "* We enable summarization, specifying we would like the LLM to pick the top 7 matching chunks and respond in English\n",
+    "\n",
+    "Using this configuration, let's create a LangChain `Runnable` object that encpasulates the full Vectara RAG pipeline, using the `as_rag` method:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "9ecda054-96a8-4a91-aeae-32006efb1ac8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"President Biden discussed several key issues in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russia, including preventing its central bank from defending the Ruble and targeting Russian oligarchs' assets, as part of efforts to weaken Russia's economy and military [3]. Additionally, he highlighted the importance of protecting women's rights, specifically the right to choose as affirmed in Roe v. Wade [5]. Lastly, he advocated for funding the police with necessary resources and training to ensure community safety [6].\""
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
+    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key)],\n",
+    "    limit=25,\n",
+    "    reranker=ChainReranker(\n",
+    "        rerankers=[\n",
+    "            CustomerSpecificReranker(reranker_id=\"rnk_272725719\", limit=100),\n",
+    "            MmrReranker(diversity_bias=0.2, limit=100),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
+    ")\n",
+    "\n",
+    "query_str = \"what did Biden say?\"\n",
+    "\n",
+    "rag = vectara.as_rag(config)\n",
+    "rag.invoke(query_str)[\"answer\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd825d63-93a0-4e45-a455-bfabb01ee1a1",
+   "metadata": {},
+   "source": [
+    "We can also use the streaming interface like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "27f01330-8917-4eff-b603-59ab2571a4d2",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "President Biden emphasized several key points in his statements. He highlighted the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also discussed measures against Russia, including preventing their central bank from defending the Ruble and targeting Russian oligarchs' assets [3]. Additionally, he reaffirmed the commitment to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5]. Lastly, he advocated for funding the police to ensure community safety [6]."
+     ]
+    }
+   ],
+   "source": [
+    "output = {}\n",
+    "curr_key = None\n",
+    "for chunk in rag.stream(query_str):\n",
+    "    for key in chunk:\n",
+    "        if key not in output:\n",
+    "            output[key] = chunk[key]\n",
+    "        else:\n",
+    "            output[key] += chunk[key]\n",
+    "        if key == \"answer\":\n",
+    "            print(chunk[key], end=\"\", flush=True)\n",
+    "        curr_key = key"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f16bf8d",
+   "metadata": {},
+   "source": [
+    "For more details about Vectara as VectorStore [go to this notebook](../vectorstores/vectara.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d49a91d2-9c53-48cb-8065-a3ba1292e8d0",
+   "metadata": {},
+   "source": [
+    "## Vectara Chat\n",
+    "\n",
+    "In most uses of LangChain to create chatbots, one must integrate a special `memory` component that maintains the history of chat sessions and then uses that history to ensure the chatbot is aware of conversation history.\n",
+    "\n",
+    "With Vectara Chat - all of that is performed in the backend by Vectara automatically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "f57264ec-e8b5-4d55-9c16-54898d506f73",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The president stated that nominating someone to serve on the United States Supreme Court is one of the most serious constitutional responsibilities he has. He nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, describing her as one of the nation’s top legal minds who will continue Justice Breyer’s legacy of excellence [1].'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
+    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key, limit=25)],\n",
+    "    reranker=MmrReranker(diversity_bias=0.2),\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "bot = vectara.as_chat(config)\n",
+    "\n",
+    "bot.invoke(\"What did the president say about Ketanji Brown Jackson?\")[\"answer\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13714687-672d-47af-997a-61bb9dd66923",
+   "metadata": {},
+   "source": [
+    "For more details about Vectara chat [go to this notebook](../chat/vectara.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "baf687dc-08c4-49af-98aa-0359e2591f2e",
+   "metadata": {},
+   "source": [
+    "## Vectara as self-querying retriever\n",
+    "Vectara offers Intelligent Query Rewriting option which  enhances search precision by automatically generating metadata filter expressions from natural language queries. This capability analyzes user queries, extracts relevant metadata filters, and rephrases the query to focus on the core information need. For more details [go to this notebook](../retrievers/self_query/vectara_self_query.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8060a423-b291-4166-8fd7-ba0e01692b51",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/providers/vectara/index.mdx
+++ b/docs/docs/integrations/providers/vectara/index.mdx
@ -1,181 +0,0 @@
-# Vectara
-
->[Vectara](https://vectara.com/) provides a Trusted Generative AI platform, allowing organizations to rapidly create a ChatGPT-like experience (an AI assistant) 
-> which is grounded in the data, documents, and knowledge that they have (technically, it is Retrieval-Augmented-Generation-as-a-service).
-
-**Vectara Overview:**
-[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.
-Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:
-1. A way to extract text from files (PDF, PPT, DOCX, etc)
-2. ML-based chunking that provides state of the art performance.
-3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.
-4. Its own internal vector database where text chunks and embedding vectors are stored.
-5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). 
-6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.
-
-For more information:
- [Documentation](https://docs.vectara.com/docs/)
- [API Playground](https://docs.vectara.com/docs/rest-api/)
- [Quickstart](https://docs.vectara.com/docs/quickstart)
-
-## Installation and Setup
-
-To use `Vectara` with LangChain no special installation steps are required. 
-To get started, [sign up](https://vectara.com/integrations/langchain) for a free Vectara trial,
-and follow the [quickstart](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key. 
-Once you have these, you can provide them as arguments to the Vectara `vectorstore`, or you can set them as environment variables.
-
- export `VECTARA_CUSTOMER_ID`="your_customer_id"
- export `VECTARA_CORPUS_ID`="your_corpus_id"
- export `VECTARA_API_KEY`="your-vectara-api-key"
-
-## Vectara as a Vector Store
-
-There exists a wrapper around the Vectara platform, allowing you to use it as a `vectorstore` in LangChain:
-
-To import this vectorstore:
-```python
-from langchain_community.vectorstores import Vectara
-```
-
-To create an instance of the Vectara vectorstore:
-```python
-vectara = Vectara(
-    vectara_customer_id=customer_id, 
-    vectara_corpus_id=corpus_id, 
-    vectara_api_key=api_key
-)
-```
-The `customer_id`, `corpus_id` and `api_key` are optional, and if they are not supplied will be read from 
-the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively.
-
-### Adding Texts or Files
-
-After you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example:
-
-```python
-vectara.add_texts(["to be or not to be", "that is the question"])
-```
-
-Since Vectara supports file-upload in the platform, we also added the ability to upload files (PDF, TXT, HTML, PPT, DOC, etc) directly. 
-When using this method, each file is uploaded directly to the Vectara backend, processed and chunked optimally there, so you don't have to use the LangChain document loader or chunking mechanism.
-
-As an example:
-
-```python
-vectara.add_files(["path/to/file1.pdf", "path/to/file2.pdf",...])
-```
-
-Of course you do not have to add any data, and instead just connect to an existing Vectara corpus where data may already be indexed.
-
-### Querying the VectorStore
-
-To query the Vectara vectorstore, you can use the `similarity_search` method (or `similarity_search_with_score`), which takes a query string and returns a list of results:
-```python
-results = vectara.similarity_search_with_score("what is LangChain?")
-```
-The results are returned as a list of relevant documents, and a relevance score of each document.
-
-In this case, we used the default retrieval parameters, but you can also specify the following additional arguments in `similarity_search` or `similarity_search_with_score`:
- `k`: number of results to return (defaults to 5)
- `lambda_val`: the [lexical matching](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) factor for hybrid search (defaults to 0.025)
- `filter`: a [filter](https://docs.vectara.com/docs/common-use-cases/filtering-by-metadata/filter-overview) to apply to the results (default None)
- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 2.
- `rerank_config`: can be used to specify reranker for thr results
-   - `reranker`: mmr, rerank_multilingual_v1 or none. Note that "rerank_multilingual_v1" is a Scale only feature
-   - `rerank_k`: number of results to use for reranking
-   - `mmr_diversity_bias`: 0 = no diversity, 1 = full diversity. This is the lambda parameter in the MMR formula and is in the range 0...1
-
-To get results without the relevance score, you can simply use the 'similarity_search' method:
-```python   
-results = vectara.similarity_search("what is LangChain?")
-```
-
-## Vectara for Retrieval Augmented Generation (RAG)
-
-Vectara provides a full RAG pipeline, including generative summarization. To use it as a complete RAG solution, you can use the `as_rag` method.
-There are a few additional parameters that can be specified in the `VectaraQueryConfig` object to control retrieval and summarization:
-* k: number of results to return
-* lambda_val: the lexical matching factor for hybrid search
-* summary_config (optional): can be used to request an LLM summary in RAG
-   - is_enabled: True or False
-   - max_results: number of results to use for summary generation
-   - response_lang: language of the response summary, in ISO 639-2 format (e.g. 'en', 'fr', 'de', etc)
-* rerank_config (optional): can be used to specify Vectara Reranker of the results
-   - reranker: mmr, rerank_multilingual_v1 or none
-   - rerank_k: number of results to use for reranking
-   - mmr_diversity_bias: 0 = no diversity, 1 = full diversity. 
-     This is the lambda parameter in the MMR formula and is in the range 0...1
-
-For example:
-
-```python
-summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang='eng')
-rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
-config = VectaraQueryConfig(k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config)
-```
-Then you can use the `as_rag` method to create a RAG pipeline:
-
-```python
-query_str = "what did Biden say?"
-
-rag = vectara.as_rag(config)
-rag.invoke(query_str)['answer']
-```
-
-The `as_rag` method returns a `VectaraRAG` object, which behaves just like any LangChain Runnable, including the `invoke` or `stream` methods.
-
-## Vectara Chat
-
-The RAG functionality can be used to create a chatbot. For example, you can create a simple chatbot that responds to user input:
-
-```python
-summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang='eng')
-rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
-config = VectaraQueryConfig(k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config)
-
-query_str = "what did Biden say?"
-bot = vectara.as_chat(config)
-bot.invoke(query_str)['answer']
-```
-
-The main difference is the following: with `as_chat` Vectara internally tracks the chat history and conditions each response on the full chat history.
-There is no need to keep that history locally to LangChain, as Vectara will manage it internally.
-
-## Vectara as a LangChain retriever only
-
-If you want to use Vectara as a retriever only, you can use the `as_retriever` method, which returns a `VectaraRetriever` object.
-```python
-retriever = vectara.as_retriever(config=config)
-retriever.invoke(query_str)
-```
-
-Like with as_rag, you provide a `VectaraQueryConfig` object to control the retrieval parameters.
-In most cases you would not enable the summary_config, but it is left as an option for backwards compatibility. 
-If no summary is requested, the response will be a list of relevant documents, each with a relevance score.
-If a summary is requested, the response will be a list of relevant documents as before, plus an additional document that includes the generative summary.
-
-## Hallucination Detection score
-
-Vectara created [HHEM](https://huggingface.co/vectara/hallucination_evaluation_model) - an open source model that can be used to evaluate RAG responses for factual consistency. 
-As part of the Vectara RAG, the "Factual Consistency Score" (or FCS), which is an improved version of the open source HHEM is made available via the API. 
-This is automatically included in the output of the RAG pipeline
-
-```python
-summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang='eng')
-rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
-config = VectaraQueryConfig(k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config)
-
-rag = vectara.as_rag(config)
-resp = rag.invoke(query_str)
-print(resp['answer'])
-print(f"Vectara FCS = {resp['fcs']}")
-```
-
-## Example Notebooks
-
-For a more detailed examples of using Vectara with LangChain, see the following example notebooks:
-* [this notebook](/docs/integrations/vectorstores/vectara) shows how to use Vectara: with full RAG or just as a retriever.
-* [this notebook](/docs/integrations/retrievers/self_query/vectara_self_query) shows the self-query capability with Vectara.
-* [this notebook](/docs/integrations/providers/vectara/vectara_chat) shows how to build a chatbot with Langchain and Vectara
-
--- a/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb
+++ b/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb
@ -8,7 +8,6 @@
    "# Vectara self-querying \n",
    "\n",
    "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
-    "\n",
    "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
    "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
    "2. ML-based chunking that provides state of the art performance.\n",
@ -17,9 +16,27 @@
    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
    "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
    "\n",
-    "See the [Vectara API documentation](https://docs.vectara.com/docs/) for more information on how to use the API.\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
    "\n",
-    "This notebook shows how to use `SelfQueryRetriever` with Vectara."
+    "\n",
+    "This notebook shows how to use `Vectara` as `SelfQueryRetriever`.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07f3f1a4-f552-4d07-ba48-18fb5d8641c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
   ]
  },
  {
@ -30,14 +47,14 @@
    "# Getting Started\n",
    "\n",
    "To get started, use the following steps:\n",
-    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial. Once you have completed your sign up you will have a Vectara customer ID. You can find your customer ID by clicking on your name, on the top-right of the Vectara console window.\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
    "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
    "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
    "\n",
-    "To use LangChain with Vectara, you'll need to have these three values: `customer ID`, `corpus ID` and `api_key`.\n",
-    "You can provide those to LangChain in two ways:\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
    "\n",
-    "1. Include in your environment these three variables: `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`.\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
    "\n",
    "   For example, you can set these variables using os.environ and getpass as follows:\n",
    "\n",
@ -45,8 +62,6 @@
    "import os\n",
    "import getpass\n",
    "\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = getpass.getpass(\"Vectara Customer ID:\")\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = getpass.getpass(\"Vectara Corpus ID:\")\n",
    "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
    "```\n",
    "\n",
@ -54,14 +69,11 @@
    "\n",
    "```python\n",
    "vectara = Vectara(\n",
-    "                vectara_customer_id=vectara_customer_id,\n",
-    "                vectara_corpus_id=vectara_corpus_id,\n",
-    "                vectara_api_key=vectara_api_key\n",
-    "            )\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
    "```\n",
-    "In this notebook we assume they are provided in the environment.\n",
    "\n",
-    "**Notes:** The self-query retriever requires you to have `lark` installed (`pip install lark`). "
+    "In this notebook we assume they are provided in the environment."
   ]
  },
  {
@ -71,14 +83,14 @@
   "source": [
    "## Connecting to Vectara from LangChain\n",
    "\n",
-    "In this example, we assume that you've created an account and a corpus, and added your `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY` (created with permissions for both indexing and query) as environment variables.\n",
+    "In this example, we assume that you've created an account and a corpus, and added your `VECTARA_CORPUS_KEY` and `VECTARA_API_KEY` (created with permissions for both indexing and query) as environment variables.\n",
    "\n",
    "We further assume the corpus has 4 fields defined as filterable metadata attributes: `year`, `director`, `rating`, and `genre`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "9d3aa44f",
   "metadata": {},
   "outputs": [],
@ -87,14 +99,10 @@
    "\n",
    "from langchain_core.documents import Document\n",
    "\n",
-    "os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"VECTARA_API_KEY\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"VECTARA_CORPUS_KEY\"\n",
    "\n",
-    "from langchain.chains.query_constructor.schema import AttributeInfo\n",
-    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
-    "from langchain_community.vectorstores import Vectara\n",
-    "from langchain_openai.chat_models import ChatOpenAI"
+    "from langchain_vectara import Vectara"
   ]
  },
  {
@ -109,7 +117,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
   "id": "bcbe04d9",
   "metadata": {
    "tags": []
@ -148,9 +156,12 @@
    "    ),\n",
    "]\n",
    "\n",
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
    "vectara = Vectara()\n",
    "for doc in docs:\n",
-    "    vectara.add_texts([doc.page_content], doc_metadata=doc.metadata)"
+    "    vectara.add_texts(\n",
+    "        [doc.page_content], corpus_key=corpus_key, doc_metadata=doc.metadata\n",
+    "    )"
   ]
  },
  {
@ -158,45 +169,32 @@
   "id": "5ecaab6d",
   "metadata": {},
   "source": [
-    "## Creating the self-querying retriever\n",
-    "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents.\n",
+    "## Self-query with Vectara\n",
+    " You don't need self-query via the LangChain mechanism—enabling `intelligent_query_rewriting` on the Vectara platform achieves the same result.\n",
+    "Vectara offers Intelligent Query Rewriting option which  enhances search precision by automatically generating metadata filter expressions from natural language queries. This capability analyzes user queries, extracts relevant metadata filters, and rephrases the query to focus on the core information need. For more [details](https://docs.vectara.com/docs/search-and-retrieval/intelligent-query-rewriting).\n",
    "\n",
-    "We then provide an llm (in this case OpenAI) and the `vectara` vectorstore as arguments:"
+    "Enable intelligent query rewriting on a per-query basis by setting the `intelligent_query_rewriting` parameter to `true` in `VectaraQueryConfig`."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
   "id": "86e34dbf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
-    "metadata_field_info = [\n",
-    "    AttributeInfo(\n",
-    "        name=\"genre\",\n",
-    "        description=\"The genre of the movie\",\n",
-    "        type=\"string or list[string]\",\n",
-    "    ),\n",
-    "    AttributeInfo(\n",
-    "        name=\"year\",\n",
-    "        description=\"The year the movie was released\",\n",
-    "        type=\"integer\",\n",
-    "    ),\n",
-    "    AttributeInfo(\n",
-    "        name=\"director\",\n",
-    "        description=\"The name of the movie director\",\n",
-    "        type=\"string\",\n",
-    "    ),\n",
-    "    AttributeInfo(\n",
-    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
-    "    ),\n",
-    "]\n",
-    "document_content_description = \"Brief summary of a movie\"\n",
-    "llm = ChatOpenAI(temperature=0, model=\"gpt-4o\", max_tokens=4069)\n",
-    "retriever = SelfQueryRetriever.from_llm(\n",
-    "    llm, vectara, document_content_description, metadata_field_info, verbose=True\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    CorpusConfig,\n",
+    "    SearchConfig,\n",
+    "    VectaraQueryConfig,\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=SearchConfig(corpora=[CorpusConfig(corpus_key=corpus_key)]),\n",
+    "    generation=None,\n",
+    "    intelligent_query_rewriting=True,\n",
    ")"
   ]
  },
@ -205,116 +203,31 @@
   "id": "ea9df8d4",
   "metadata": {},
   "source": [
-    "## Self-retrieval Queries\n",
-    "And now we can try actually using our retriever!"
+    "## Queries\n",
+    "And now we can try actually using our vectara_queries method!"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
   "id": "38a126e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'lang': 'eng', 'offset': '0', 'len': '66', 'year': '1993', 'rating': '7.7', 'genre': 'science fiction', 'source': 'langchain'}),\n",
-       " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Toys come alive and have a blast doing so', metadata={'lang': 'eng', 'offset': '0', 'len': '41', 'year': '1995', 'genre': 'animated', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}),\n",
-       " Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'lang': 'eng', 'offset': '0', 'len': '82', 'year': '2019', 'director': 'Greta Gerwig', 'rating': '8.3', 'source': 'langchain'}),\n",
-       " Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'lang': 'eng', 'offset': '0', 'len': '76', 'year': '2010', 'director': 'Christopher Nolan', 'rating': '8.2', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example only specifies a relevant query\n",
-    "retriever.invoke(\"What are movies about scientists\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "fc3f1e6e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example only specifies a filter\n",
-    "retriever.invoke(\"I want to watch a movie rated higher than 8.5\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "b19d4da0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'lang': 'eng', 'offset': '0', 'len': '82', 'year': '2019', 'director': 'Greta Gerwig', 'rating': '8.3', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example specifies a query and a filter\n",
-    "retriever.invoke(\"Has Greta Gerwig directed any movies about women\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "f900e40e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example specifies a composite filter\n",
-    "retriever.invoke(\"What's a highly rated (above 8.5) science fiction film?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "12a51522",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='Toys come alive and have a blast doing so', metadata={'lang': 'eng', 'offset': '0', 'len': '41', 'year': '1995', 'genre': 'animated', 'source': 'langchain'}),\n",
-       " Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'lang': 'eng', 'offset': '0', 'len': '66', 'year': '1993', 'rating': '7.7', 'genre': 'science fiction', 'source': 'langchain'})]"
+       "[(Document(metadata={'year': 1995, 'genre': 'animated', 'source': 'langchain'}, page_content='Toys come alive and have a blast doing so'),\n",
+       "  0.4141285717487335),\n",
+       " (Document(metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       "  0.4046250879764557),\n",
+       " (Document(metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2, 'source': 'langchain'}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'),\n",
+       "  0.227469339966774),\n",
+       " (Document(metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3, 'source': 'langchain'}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them'),\n",
+       "  0.19208428263664246),\n",
+       " (Document(metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction', 'source': 'langchain'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       "  0.1902722418308258),\n",
+       " (Document(metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6, 'source': 'langchain'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
+       "  0.08151976019144058)]"
      ]
     },
     "execution_count": 8,
@ -323,74 +236,107 @@
    }
   ],
   "source": [
-    "# This example specifies a query and composite filter\n",
-    "retriever.invoke(\n",
-    "    \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
-   "metadata": {},
-   "source": [
-    "## Filter k\n",
-    "\n",
-    "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
-    "\n",
-    "We can do this by passing `enable_limit=True` to the constructor."
+    "# This example only specifies a relevant query\n",
+    "vectara.vectara_query(\"What are movies about scientists\", config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
-   "id": "bff36b88-b506-4877-9c63-e5a1a8d78e64",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "retriever = SelfQueryRetriever.from_llm(\n",
-    "    llm,\n",
-    "    vectara,\n",
-    "    document_content_description,\n",
-    "    metadata_field_info,\n",
-    "    enable_limit=True,\n",
-    "    verbose=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "00e8baad-a9d7-4498-bd8d-ca41d0691386",
+   "id": "fc3f1e6e",
   "metadata": {},
-   "source": [
-    "This is cool, we can include the number of results we would like to see in the query and the self retriever would correctly understand it. For example, let's look for "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "2758d229-4f97-499c-819f-888acaf8ee10",
-   "metadata": {
-    "tags": []
-   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'})]"
+       "[(Document(metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6, 'source': 'langchain'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
+       "  0.34279149770736694),\n",
+       " (Document(metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       "  0.242923304438591)]"
      ]
     },
-     "execution_count": 10,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "# This example only specifies a relevant query\n",
-    "retriever.invoke(\"what are two movies with a rating above 8.5\")"
+    "# This example only specifies a filter\n",
+    "vectara.vectara_query(\"I want to watch a movie rated higher than 8.5\", config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "b19d4da0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(Document(metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3, 'source': 'langchain'}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them'),\n",
+       "  0.10141132771968842)]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and a filter\n",
+    "vectara.vectara_query(\"Has Greta Gerwig directed any movies about women\", config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "f900e40e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(Document(metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       "  0.9508692026138306)]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a composite filter\n",
+    "vectara.vectara_query(\"What's a highly rated (above 8.5) science fiction film?\", config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "12a51522",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(Document(metadata={'year': 1995, 'genre': 'animated', 'source': 'langchain'}, page_content='Toys come alive and have a blast doing so'),\n",
+       "  0.7290377616882324),\n",
+       " (Document(metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction', 'source': 'langchain'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       "  0.4838160574436188)]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and composite filter\n",
+    "vectara.vectara_query(\n",
+    "    \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\",\n",
+    "    config,\n",
+    ")"
   ]
  },
  {
@ -418,7 +364,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.0"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/tools/memgraph.ipynb
+++ b/docs/docs/integrations/tools/memgraph.ipynb
@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "afaf8039",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Memgraph\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e49f1e0d",
+   "metadata": {},
+   "source": [
+    "# MemgraphToolkit\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "This will help you getting started with the Memgraph [toolkit](/docs/concepts/tools/#toolkits). \n",
+    "\n",
+    "Tools within `MemgraphToolkit` are designed for the interaction with the `Memgraph` database.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To be able tot follow the steps below, make sure you have a running Memgraph instance on your local host. For more details on how to run Memgraph, take a look at [Memgraph docs](https://memgraph.com/docs/getting-started)\n",
+    "  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72ee0c4b-9764-423a-9dbf-95129e185210",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
+    "# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0730d6a1-c893-4840-9817-5e5251676d5d",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "This toolkit lives in the `langchain-memgraph` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "652d6238-1f87-422a-b135-f5abbb8652fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-memgraph "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a38cde65-254d-4219-a441-068766c0d4b5",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our toolkit:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_memgraph import MemgraphToolkit\n",
+    "from langchain_memgraph.graphs.memgraph import Memgraph\n",
+    "\n",
+    "db = Memgraph(url=url, username=username, password=password)\n",
+    "\n",
+    "llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\")\n",
+    "\n",
+    "toolkit = MemgraphToolkit(\n",
+    "    db=db,  # Memgraph instance\n",
+    "    llm=llm,  # LLM chat model for LLM operations\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c5f2839-4020-424e-9fc9-07777eede442",
+   "metadata": {},
+   "source": [
+    "## Tools\n",
+    "\n",
+    "View available tools:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toolkit.get_tools()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "608af19d",
+   "metadata": {},
+   "source": [
+    "## Invocation\n",
+    "\n",
+    "Tools can be individually called by passing an arguments, for QueryMemgraphTool it would be: \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffa944db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_memgraph.tools import QueryMemgraphTool\n",
+    "\n",
+    "# Rest of the code omitted for brevity\n",
+    "\n",
+    "tool.invoke({QueryMemgraphTool({\"query\": \"MATCH (n) RETURN n LIMIT 5\"})})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e",
+   "metadata": {},
+   "source": [
+    "## Use within an agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "310bf18e-6c9a-4072-b86e-47bc1fcca29d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "agent_executor = create_react_agent(llm, tools)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_query = \"MATCH (n) RETURN n LIMIT 1\"\n",
+    "\n",
+    "events = agent_executor.stream(\n",
+    "    {\"messages\": [(\"user\", example_query)]},\n",
+    "    stream_mode=\"values\",\n",
+    ")\n",
+    "for event in events:\n",
+    "    event[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29ca615b",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For more details on API visit [Memgraph integration docs](https://memgraph.com/docs/ai-ecosystem/integrations#langchain)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/tools/tavily_extract.ipynb
+++ b/docs/docs/integrations/tools/tavily_extract.ipynb
--- a/docs/docs/integrations/tools/tavily_search.ipynb
+++ b/docs/docs/integrations/tools/tavily_search.ipynb
@ -20,7 +20,7 @@
    "### Integration details\n",
    "| Class                                                         | Package                                                        | Serializable | [JS support](https://js.langchain.com/docs/integrations/tools/tavily_search) |  Package latest |\n",
    "|:--------------------------------------------------------------|:---------------------------------------------------------------| :---: | :---: | :---: |\n",
-    "| [TavilySearch](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ❌ | ❌  |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-tavily?style=flat-square&label=%20) |\n",
+    "| [TavilySearch](https://github.com/tavily-ai/langchain-tavily) | [langchain-tavily](https://pypi.org/project/langchain-tavily/) | ✅ | ❌  |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-tavily?style=flat-square&label=%20) |\n",
    "\n",
    "### Tool features\n",
    "| [Returns artifact](/docs/how_to/tool_artifacts/) | Native async |                       Return data                        | Pricing |\n",
@ -73,26 +73,36 @@
   ]
  },
  {
-   "cell_type": "markdown",
-   "id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
   "metadata": {},
+   "cell_type": "markdown",
   "source": [
    "## Instantiation\n",
    "\n",
-    "Here we show how to instantiate an instance of the Tavily search tools, with"
-   ]
+    "Here we show how to instantiate an instance of the Tavily search tool. The tool accepts various parameters to customize the search. After instantiation we invoke the tool with a simple query. This tool allows you to complete search queries using Tavily's Search API endpoint.\n",
+    "\n",
+    "Instantiation\n",
+    "The tool accepts various parameters during instantiation:\n",
+    "\n",
+    "- max_results (optional, int): Maximum number of search results to return. Default is 5.\n",
+    "- topic (optional, str): Category of the search. Can be \"general\", \"news\", or \"finance\". Default is \"general\".\n",
+    "- include_answer (optional, bool): Include an answer to original query in results. Default is False.\n",
+    "- include_raw_content (optional, bool): Include cleaned and parsed HTML of each search result. Default is False.\n",
+    "- include_images (optional, bool): Include a list of query related images in the response. Default is False.\n",
+    "- include_image_descriptions (optional, bool): Include descriptive text for each image. Default is False.\n",
+    "- search_depth (optional, str): Depth of the search, either \"basic\" or \"advanced\". Default is \"basic\".\n",
+    "- time_range (optional, str): The time range back from the current date to filter results - \"day\", \"week\", \"month\", or \"year\". Default is None.\n",
+    "- include_domains (optional, List[str]): List of domains to specifically include. Default is None.\n",
+    "- exclude_domains (optional, List[str]): List of domains to specifically exclude. Default is None.\n",
+    "\n",
+    "For a comprehensive overview of the available parameters, refer to the [Tavily Search API documentation](https://docs.tavily.com/documentation/api-reference/endpoint/search)"
+   ],
+   "id": "72461be913bfaf2b"
  },
  {
+   "metadata": {},
   "cell_type": "code",
-   "execution_count": 3,
-   "id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-19T16:44:04.570451Z",
-     "start_time": "2025-03-19T16:44:04.561713Z"
-    }
-   },
   "outputs": [],
+   "execution_count": null,
   "source": [
    "from langchain_tavily import TavilySearch\n",
    "\n",
@ -108,12 +118,12 @@
    "    # include_domains=None,\n",
    "    # exclude_domains=None\n",
    ")"
-   ]
+   ],
+   "id": "dc382e5426394836"
  },
  {
-   "cell_type": "markdown",
-   "id": "74147a1a",
   "metadata": {},
+   "cell_type": "markdown",
   "source": [
    "## Invocation\n",
    "\n",
@ -121,62 +131,21 @@
    "\n",
    "The Tavily search tool accepts the following arguments during invocation:\n",
    "- `query` (required): A natural language search query\n",
-    "- The following arguments can also be set during invokation : `include_images`, `search_depth` , `time_range`, `include_domains`, `exclude_domains`, `include_images`\n",
+    "- The following arguments can also be set during invocation : `include_images`, `search_depth` , `time_range`, `include_domains`, `exclude_domains`, `include_images`\n",
    "- For reliability and performance reasons, certain parameters that affect response size cannot be modified during invocation: `include_answer` and `include_raw_content`. These limitations prevent unexpected context window issues and ensure consistent results.\n",
    "\n",
    "\n",
-    "NOTE: The optional arguments are available for agents to dynamically set, if you set a argument during instantiation and then invoke the tool with a different value, the tool will use the value you passed during invokation."
-   ]
+    "NOTE: The optional arguments are available for agents to dynamically set, if you set an argument during instantiation and then invoke the tool with a different value, the tool will use the value you passed during invocation."
+   ],
+   "id": "f997d2733b63f655"
  },
  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'query': 'What happened at the last wimbledon',\n",
-       " 'follow_up_questions': None,\n",
-       " 'answer': None,\n",
-       " 'images': [],\n",
-       " 'results': [{'title': \"Andy Murray pulls out of the men's singles draw at his last Wimbledon\",\n",
-       "   'url': 'https://www.nbcnews.com/news/sports/andy-murray-wimbledon-tennis-singles-draw-rcna159912',\n",
-       "   'content': \"NBC News Now LONDON — Andy Murray, one of the last decade's most successful male tennis players, has pulled out of the singles tournament at what is almost certain to be his last Wimbledon, his team confirmed Tuesday. Murray, 37, who has won the Wimbledon singles title twice and the U.S Open once, has been battling to be fit to play at the All England Club for weeks. “Unfortunately, despite working incredibly hard on his recovery since his operation just over a week ago, Andy has taken the very difficult decision not to play the singles this year,” his team said in a statement reported by Sky News. The news caps a glittering career on the ATP singles tour, which placed Murray at No. 1 in the world for 41 weeks.\",\n",
-       "   'score': 0.67527276,\n",
-       "   'raw_content': None},\n",
-       "  {'title': 'Andy Murray brought to tears by emotional ceremony as Wimbledon ...',\n",
-       "   'url': 'https://edition.cnn.com/2024/07/05/sport/andy-murray-wimbledon-farewell-ceremony-spt-intl/index.html',\n",
-       "   'content': 'Andy Murray brought to tears by emotional ceremony as Wimbledon farewell begins with doubles defeat | CNN Football Tennis Golf Motorsport US Sports Olympics Climbing Esports Hockey CNN10 About CNN Andy Murray became emotional when speaking on court following his Wimbledon defeat on Thursday. It was an emotional night for three-time grand slam champion Andy Murray on Thursday, as the 37-year-old’s Wimbledon farewell began with doubles defeat. Following a doubles defeat alongside his brother Jamie on Thursday, Murray was moved to tears after a short ceremony on Centre Court in which a montage of his career played out on big screens. Murray watches on as a video montage of his career highlights plays on the big screens at Wimbledon. CNN10 About CNN',\n",
-       "   'score': 0.43482184,\n",
-       "   'raw_content': None},\n",
-       "  {'title': 'Wimbledon - Latest News, Headlines and Entertainment from the BBC',\n",
-       "   'url': 'https://www.bbc.co.uk/news/topics/c1kr68g26j9t',\n",
-       "   'content': \"Wimbledon - Latest News, Headlines and Entertainment from the BBC BBC Homepage Search BBC Close menu BBC News BBC Verify World News TV Weather for Wimbledon London London Disabled people share experience of accessible homes London Man's pop-up urinal death may never be explained, family fears London London London London London London London Met PC jailed for assaulting man in hospital bed London London London Man jumped to his death in police station - inquest London Central London YMCA closes after failed injunction Kerr denies 'whiteness as insult' against police Man denies being getaway driver in £1m watch raid About the BBC Contact the BBC BBC emails for you The BBC is not responsible for the content of external sites.\",\n",
-       "   'score': 0.3916624,\n",
-       "   'raw_content': None},\n",
-       "  {'title': 'Wimbledon - latest news, breaking stories and comment - The Independent',\n",
-       "   'url': 'https://www.independent.co.uk/topic/wimbledon',\n",
-       "   'content': \"Independent Australian Open champion Jannik Sinner's style draws comparisons to Novak Djokovic Patten wins second grand slam doubles title after Australian Open epic Australian Open: Madison Keys can win her first Slam title and stop Aryna Sabalenka's threepeat Novak Djokovic hits back to beat Carlos Alcaraz in Australian Open thriller Australian Open 2025: Carlos Alcaraz and Jannik Sinner have a real rivalry atop men's tennis Australian Open 2025: Carlos Alcaraz and Jannik Sinner have a real rivalry atop men's tennis Australian Open 2025: Cases involving Jannik Sinner and Iga Swiatek make doping a top topic Australian Open 2025: There really isn't much time off in the offseason for tennis players Jd Sports Discount Code\",\n",
-       "   'score': 0.3539422,\n",
-       "   'raw_content': None},\n",
-       "  {'title': 'Novak Djokovic loses to Carlos Alcaraz Wimbledon final',\n",
-       "   'url': 'https://www.wimbledon.com/en_GB/news/articles/2023-07-16/alcaraz_ends_the_djokovic_run.html',\n",
-       "   'content': 'Password* By joining myWimbledon you are confirming you are happy to receive news and information from The All England Lawn Tennis Club regarding The Club, The Championships and The Grounds via email By joining myWimbledon you are confirming you are happy to receive news and information from The All England Lawn Tennis Club regarding The Club, The Championships and The Grounds via email Please enter your email address to update your password: We have sent details on how to update your password to the email address you provided. A verification email with a link to verify your account has been sent to you. Please enter the code sent to your email address below and click SUBMIT to complete the verification.',\n",
-       "   'score': 0.23453853,\n",
-       "   'raw_content': None}],\n",
-       " 'response_time': 1.43}"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tool.invoke({\"query\": \"What happened at the last wimbledon\"})"
-   ]
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "tool.invoke({\"query\": \"What happened at the last wimbledon\"})",
+   "id": "5e75399230ab9fc1"
  },
  {
   "cell_type": "markdown",
@ -287,21 +256,21 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "================================\u001b[1m Human Message \u001b[0m=================================\n",
+      "================================\u001B[1m Human Message \u001B[0m=================================\n",
      "\n",
      "What nation hosted the Euro 2024? Include only wikipedia sources.\n",
-      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "==================================\u001B[1m Ai Message \u001B[0m==================================\n",
      "Tool Calls:\n",
      "  tavily_search (call_yxmR4K2uadsQ8LKoyi8JyoLD)\n",
      " Call ID: call_yxmR4K2uadsQ8LKoyi8JyoLD\n",
      "  Args:\n",
      "    query: Euro 2024 host nation\n",
      "    include_domains: ['wikipedia.org']\n",
-      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "=================================\u001B[1m Tool Message \u001B[0m=================================\n",
      "Name: tavily_search\n",
      "\n",
      "{\"query\": \"Euro 2024 host nation\", \"follow_up_questions\": null, \"answer\": null, \"images\": [], \"results\": [{\"title\": \"UEFA Euro 2024 - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/UEFA_Euro_2024\", \"content\": \"Tournament details Host country Germany Dates 14 June – 14 July Teams 24 Venue(s) 10 (in 10 host cities) Final positions Champions Spain (4th title) Runners-up England Tournament statistics Matches played 51 Goals scored 117 (2.29 per match) Attendance 2,681,288 (52,574 per match) Top scorer(s) Harry Kane Georges Mikautadze Jamal Musiala Cody Gakpo Ivan Schranz Dani Olmo (3 goals each) Best player(s) Rodri Best young player Lamine Yamal ← 2020 2028 → The 2024 UEFA European Football Championship, commonly referred to as UEFA Euro 2024 (stylised as UEFA EURO 2024) or simply Euro 2024, was the 17th UEFA European Championship, the quadrennial international football championship organised by UEFA for the European men's national teams of their member associations. Germany hosted the tournament, which took place from 14 June to 14 July 2024. The tournament involved 24 teams, with Georgia making their European Championship debut. [4] Host nation Germany were eliminated by Spain in the quarter-finals; Spain went on to win the tournament for a record fourth time after defeating England 2–1 in the final.\", \"score\": 0.9104262, \"raw_content\": null}, {\"title\": \"UEFA Euro 2024 - Simple English Wikipedia, the free encyclopedia\", \"url\": \"https://simple.wikipedia.org/wiki/UEFA_Euro_2024\", \"content\": \"The 2024 UEFA European Football Championship, also known as UEFA Euro 2024 or simply Euro 2024, was the 17th edition of the UEFA European Championship. Germany was hosting the tournament. ... The UEFA Executive Committee voted for the host in a secret ballot, with only a simple majority (more than half of the valid votes) required to determine\", \"score\": 0.81418616, \"raw_content\": null}, {\"title\": \"Championnat d'Europe de football 2024 — Wikipédia\", \"url\": \"https://fr.wikipedia.org/wiki/Championnat_d'Europe_de_football_2024\", \"content\": \"Le Championnat d'Europe de l'UEFA de football 2024 est la 17 e édition du Championnat d'Europe de football, communément abrégé en Euro 2024, compétition organisée par l'UEFA et rassemblant les meilleures équipes nationales masculines européennes. L'Allemagne est désignée pays organisateur de la compétition le 27 septembre 2018. C'est la troisième fois que des matches du Championnat\", \"score\": 0.8055255, \"raw_content\": null}, {\"title\": \"UEFA Euro 2024 bids - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/UEFA_Euro_2024_bids\", \"content\": \"The bidding process of UEFA Euro 2024 ended on 27 September 2018 in Nyon, Switzerland, when Germany was announced to be the host. [1] Two bids came before the deadline, 3 March 2017, which were Germany and Turkey as single bids. ... Press agencies revealed on 24 October 2013, that the European football governing body UEFA would have decided on\", \"score\": 0.7882741, \"raw_content\": null}, {\"title\": \"2024 UEFA European Under-19 Championship - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2024_UEFA_European_Under-19_Championship\", \"content\": \"The 2024 UEFA European Under-19 Championship (also known as UEFA Under-19 Euro 2024) was the 21st edition of the UEFA European Under-19 Championship (71st edition if the Under-18 and Junior eras are included), the annual international youth football championship organised by UEFA for the men's under-19 national teams of Europe. Northern Ireland hosted the tournament from 15 to 28 July 2024.\", \"score\": 0.7783298, \"raw_content\": null}], \"response_time\": 1.67}\n",
-      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "==================================\u001B[1m Ai Message \u001B[0m==================================\n",
      "\n",
      "The nation that hosted Euro 2024 was Germany. You can find more information on the [Wikipedia page for UEFA Euro 2024](https://en.wikipedia.org/wiki/UEFA_Euro_2024).\n"
     ]
--- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb
+++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
@ -462,6 +462,475 @@
    "retriever.invoke(\"Stealing from the bank is a crime\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "5828dda5",
+   "metadata": {},
+   "source": [
+    "## Distance Similarity Algorithm\n",
+    "\n",
+    "Elasticsearch supports the following vector distance similarity algorithms:\n",
+    "\n",
+    "- cosine\n",
+    "- euclidean\n",
+    "- dot_product\n",
+    "\n",
+    "The cosine similarity algorithm is the default.\n",
+    "\n",
+    "You can specify the similarity Algorithm needed via the similarity parameter.\n",
+    "\n",
+    "**NOTE**: Depending on the retrieval strategy, the similarity algorithm cannot be changed at query time. It is needed to be set when creating the index mapping for field. If you need to change the similarity algorithm, you need to delete the index and recreate it with the correct distance_strategy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cec8b2ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    distance_strategy=\"COSINE\",\n",
+    "    # distance_strategy=\"EUCLIDEAN_DISTANCE\"\n",
+    "    # distance_strategy=\"DOT_PRODUCT\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c9fb8a0",
+   "metadata": {},
+   "source": [
+    "## Retrieval Strategies\n",
+    "\n",
+    "Elasticsearch has big advantages over other vector only databases from its ability to support a wide range of retrieval strategies. In this notebook we will configure `ElasticsearchStore` to support some of the most common retrieval strategies.\n",
+    "\n",
+    "By default, `ElasticsearchStore` uses the `DenseVectorStrategy` (was called `ApproxRetrievalStrategy` prior to version 0.2.0).\n",
+    "\n",
+    "### DenseVectorStrategy\n",
+    "\n",
+    "This will return the top k most similar vectors to the query vector. The `k` parameter is set when the `ElasticsearchStore` is initialized. The default value is 10."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d59a493",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import DenseVectorStrategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorStrategy(),\n",
+    ")\n",
+    "\n",
+    "docs = db.similarity_search(\n",
+    "    query=\"What did the president say about Ketanji Brown Jackson?\", k=10\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cf5d3d2",
+   "metadata": {},
+   "source": [
+    "#### Example: Hybrid retrieval with dense vector and keyword search\n",
+    "\n",
+    "This example will show how to configure ElasticsearchStore to perform a hybrid retrieval, using a combination of approximate semantic search and keyword based search.\n",
+    "\n",
+    "We use RRF to balance the two scores from different retrieval methods.\n",
+    "\n",
+    "To enable hybrid retrieval, we need to set `hybrid=True` in the `DenseVectorStrategy` constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "109f992a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorStrategy(hybrid=True),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6e62ef0",
+   "metadata": {},
+   "source": [
+    "When hybrid is enabled, the query performed will be a combination of approximate semantic search and keyword based search.\n",
+    "\n",
+    "It will use rrf (Reciprocal Rank Fusion) to balance the two scores from different retrieval methods.\n",
+    "\n",
+    "**Note**: RRF requires Elasticsearch 8.9.0 or above."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c07444e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "{\n",
+    "    \"retriever\": {\n",
+    "        \"rrf\": {\n",
+    "            \"retrievers\": [\n",
+    "                {\n",
+    "                    \"standard\": {\n",
+    "                        \"query\": {\n",
+    "                            \"bool\": {\n",
+    "                                \"filter\": [],\n",
+    "                                \"must\": [{\"match\": {\"text\": {\"query\": \"foo\"}}}],\n",
+    "                            }\n",
+    "                        },\n",
+    "                    },\n",
+    "                },\n",
+    "                {\n",
+    "                    \"knn\": {\n",
+    "                        \"field\": \"vector\",\n",
+    "                        \"filter\": [],\n",
+    "                        \"k\": 1,\n",
+    "                        \"num_candidates\": 50,\n",
+    "                        \"query_vector\": [1.0, ..., 0.0],\n",
+    "                    },\n",
+    "                },\n",
+    "            ]\n",
+    "        }\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2822fbf7",
+   "metadata": {},
+   "source": [
+    "#### Example: Dense vector search with Embedding Model in Elasticsearch\n",
+    "\n",
+    "This example will show how to configure `ElasticsearchStore` to use the embedding model deployed in Elasticsearch for dense vector retrieval.\n",
+    "\n",
+    "To use this, specify the model_id in `DenseVectorStrategy` constructor via the `query_model_id` argument.\n",
+    "\n",
+    "**NOTE**: This requires the model to be deployed and running in Elasticsearch ML node. See [notebook example](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb) on how to deploy the model with `eland`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d97d9db4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DENSE_SELF_DEPLOYED_INDEX_NAME = \"test-dense-self-deployed\"\n",
+    "\n",
+    "# Note: This does not have an embedding function specified\n",
+    "# Instead, we will use the embedding model deployed in Elasticsearch\n",
+    "db = ElasticsearchStore(\n",
+    "    es_cloud_id=\"<your cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<your password>\",\n",
+    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    query_field=\"text_field\",\n",
+    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
+    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
+    ")\n",
+    "\n",
+    "# Setup a Ingest Pipeline to perform the embedding\n",
+    "# of the text field\n",
+    "db.client.ingest.put_pipeline(\n",
+    "    id=\"test_pipeline\",\n",
+    "    processors=[\n",
+    "        {\n",
+    "            \"inference\": {\n",
+    "                \"model_id\": \"sentence-transformers__all-minilm-l6-v2\",\n",
+    "                \"field_map\": {\"query_field\": \"text_field\"},\n",
+    "                \"target_field\": \"vector_query_field\",\n",
+    "            }\n",
+    "        }\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# creating a new index with the pipeline,\n",
+    "# not relying on langchain to create the index\n",
+    "db.client.indices.create(\n",
+    "    index=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    mappings={\n",
+    "        \"properties\": {\n",
+    "            \"text_field\": {\"type\": \"text\"},\n",
+    "            \"vector_query_field\": {\n",
+    "                \"properties\": {\n",
+    "                    \"predicted_value\": {\n",
+    "                        \"type\": \"dense_vector\",\n",
+    "                        \"dims\": 384,\n",
+    "                        \"index\": True,\n",
+    "                        \"similarity\": \"l2_norm\",\n",
+    "                    }\n",
+    "                }\n",
+    "            },\n",
+    "        }\n",
+    "    },\n",
+    "    settings={\"index\": {\"default_pipeline\": \"test_pipeline\"}},\n",
+    ")\n",
+    "\n",
+    "db.from_texts(\n",
+    "    [\"hello world\"],\n",
+    "    es_cloud_id=\"<cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<cloud password>\",\n",
+    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    query_field=\"text_field\",\n",
+    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
+    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
+    ")\n",
+    "\n",
+    "# Perform search\n",
+    "db.similarity_search(\"hello world\", k=10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9651b01",
+   "metadata": {},
+   "source": [
+    "### SparseVectorStrategy (ELSER)\n",
+    "\n",
+    "This strategy uses Elasticsearch's sparse vector retrieval to retrieve the top-k results. We only support our own \"ELSER\" embedding model for now.\n",
+    "\n",
+    "**NOTE**: This requires the ELSER model to be deployed and running in Elasticsearch ml node.\n",
+    "\n",
+    "To use this, specify `SparseVectorStrategy` (was called `SparseVectorRetrievalStrategy` prior to version 0.2.0) in the `ElasticsearchStore` constructor. You will need to provide a model ID."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c750ff57",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import SparseVectorStrategy\n",
+    "\n",
+    "# Note that this example doesn't have an embedding function. This is because we infer the tokens at index time and at query time within Elasticsearch.\n",
+    "# This requires the ELSER model to be loaded and running in Elasticsearch.\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    es_cloud_id=\"<cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<cloud password>\",\n",
+    "    index_name=\"test-elser\",\n",
+    "    strategy=SparseVectorStrategy(model_id=\".elser_model_2\"),\n",
+    ")\n",
+    "\n",
+    "db.client.indices.refresh(index=\"test-elser\")\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"What did the president say about Ketanji Brown Jackson\", k=4\n",
+    ")\n",
+    "print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "416e224e",
+   "metadata": {},
+   "source": [
+    "### DenseVectorScriptScoreStrategy\n",
+    "\n",
+    "This strategy uses Elasticsearch's script score query to perform exact vector retrieval (also known as brute force) to retrieve the top-k results. (This strategy was called `ExactRetrievalStrategy` prior to version 0.2.0.)\n",
+    "\n",
+    "To use this, specify `DenseVectorScriptScoreStrategy` in `ElasticsearchStore` constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ced32701",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import SparseVectorStrategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorScriptScoreStrategy(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92c9cc33",
+   "metadata": {},
+   "source": [
+    "### BM25Strategy\n",
+    "\n",
+    "Finally, you can use full-text keyword search.\n",
+    "\n",
+    "To use this, specify `BM25Strategy` in `ElasticsearchStore` constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9fd59f69",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import BM25Strategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=BM25Strategy(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6556d3c6",
+   "metadata": {},
+   "source": [
+    "### BM25RetrievalStrategy\n",
+    "\n",
+    "This strategy allows the user to perform searches using pure BM25 without vector search.\n",
+    "\n",
+    "To use this, specify `BM25RetrievalStrategy` in `ElasticsearchStore` constructor.\n",
+    "\n",
+    "Note that in the example below, the embedding option is not specified, indicating that the search is conducted without using embeddings.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "478af4bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import ElasticsearchStore\n",
+    "\n",
+    "db = ElasticsearchStore(\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test_index\",\n",
+    "    strategy=ElasticsearchStore.BM25RetrievalStrategy(),\n",
+    ")\n",
+    "\n",
+    "db.add_texts(\n",
+    "    [\"foo\", \"foo bar\", \"foo bar baz\", \"bar\", \"bar baz\", \"baz\"],\n",
+    ")\n",
+    "\n",
+    "results = db.similarity_search(query=\"foo\", k=10)\n",
+    "print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed899034",
+   "metadata": {},
+   "source": [
+    "## Customise the Query\n",
+    "\n",
+    "With `custom_query` parameter at search, you are able to adjust the query that is used to retrieve documents from Elasticsearch. This is useful if you want to use a more complex query, to support linear boosting of fields.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0ab7c94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example of a custom query thats just doing a BM25 search on the text field.\n",
+    "def custom_query(query_body: dict, query: str):\n",
+    "    \"\"\"Custom query to be used in Elasticsearch.\n",
+    "    Args:\n",
+    "        query_body (dict): Elasticsearch query body.\n",
+    "        query (str): Query string.\n",
+    "    Returns:\n",
+    "        dict: Elasticsearch query body.\n",
+    "    \"\"\"\n",
+    "    print(\"Query Retriever created by the retrieval strategy:\")\n",
+    "    print(query_body)\n",
+    "    print()\n",
+    "\n",
+    "    new_query_body = {\"query\": {\"match\": {\"text\": query}}}\n",
+    "\n",
+    "    print(\"Query thats actually used in Elasticsearch:\")\n",
+    "    print(new_query_body)\n",
+    "    print()\n",
+    "\n",
+    "    return new_query_body\n",
+    "\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"What did the president say about Ketanji Brown Jackson\",\n",
+    "    k=4,\n",
+    "    custom_query=custom_query,\n",
+    ")\n",
+    "print(\"Results:\")\n",
+    "print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "15ebbe22",
+   "metadata": {},
+   "source": [
+    "## Customize the Document Builder\n",
+    "\n",
+    "With `doc_builder` parameter at search, you are able to adjust how a Document is being built using data retrieved from Elasticsearch. This is especially useful if you have indices which were not created using Langchain.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cf81750",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Dict\n",
+    "\n",
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "\n",
+    "def custom_document_builder(hit: Dict) -> Document:\n",
+    "    src = hit.get(\"_source\", {})\n",
+    "    return Document(\n",
+    "        page_content=src.get(\"content\", \"Missing content!\"),\n",
+    "        metadata={\n",
+    "            \"page_number\": src.get(\"page_number\", -1),\n",
+    "            \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n",
+    "        },\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"What did the president say about Ketanji Brown Jackson\",\n",
+    "    k=4,\n",
+    "    doc_builder=custom_document_builder,\n",
+    ")\n",
+    "print(\"Results:\")\n",
+    "print(results[0])"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "17b509ae",
--- a/docs/docs/integrations/vectorstores/vectara.ipynb
+++ b/docs/docs/integrations/vectorstores/vectara.ipynb
@ -8,20 +8,35 @@
    "# Vectara\n",
    "\n",
    "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
-    "\n",
    "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
    "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
    "2. ML-based chunking that provides state of the art performance.\n",
    "3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.\n",
    "4. Its own internal vector database where text chunks and embedding vectors are stored.\n",
-    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments (including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
+    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
    "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
    "\n",
-    "See the [Vectara API documentation](https://docs.vectara.com/docs/) for more information on how to use the API.\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
    "\n",
    "This notebook shows how to use the basic retrieval functionality, when utilizing Vectara just as a Vector Store (without summarization), incuding: `similarity_search` and `similarity_search_with_score` as well as using the LangChain `as_retriever` functionality.\n",
    "\n",
-    "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration"
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfdf03ba-d6f5-4b1e-86d3-a65c4bc99aa1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
   ]
  },
  {
@ -32,14 +47,14 @@
    "# Getting Started\n",
    "\n",
    "To get started, use the following steps:\n",
-    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial. Once you have completed your sign up you will have a Vectara customer ID. You can find your customer ID by clicking on your name, on the top-right of the Vectara console window.\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
    "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
    "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
    "\n",
-    "To use LangChain with Vectara, you'll need to have these three values: `customer ID`, `corpus ID` and `api_key`.\n",
-    "You can provide those to LangChain in two ways:\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
    "\n",
-    "1. Include in your environment these three variables: `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`.\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
    "\n",
    "   For example, you can set these variables using os.environ and getpass as follows:\n",
    "\n",
@ -47,8 +62,6 @@
    "import os\n",
    "import getpass\n",
    "\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = getpass.getpass(\"Vectara Customer ID:\")\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = getpass.getpass(\"Vectara Corpus ID:\")\n",
    "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
    "```\n",
    "\n",
@ -56,10 +69,8 @@
    "\n",
    "```python\n",
    "vectara = Vectara(\n",
-    "                vectara_customer_id=vectara_customer_id,\n",
-    "                vectara_corpus_id=vectara_corpus_id,\n",
-    "                vectara_api_key=vectara_api_key\n",
-    "            )\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
    "```\n",
    "\n",
    "In this notebook we assume they are provided in the environment."
@ -67,23 +78,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "id": "aac7a9a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
-    "os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"<VECTARA_API_KEY>\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"VECTARA_CORPUS_KEY\"\n",
    "\n",
-    "from langchain_community.vectorstores import Vectara\n",
-    "from langchain_community.vectorstores.vectara import (\n",
-    "    RerankConfig,\n",
-    "    SummaryConfig,\n",
+    "from langchain_vectara import Vectara\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    ChainReranker,\n",
+    "    CorpusConfig,\n",
+    "    CustomerSpecificReranker,\n",
+    "    File,\n",
+    "    GenerationConfig,\n",
+    "    MmrReranker,\n",
+    "    SearchConfig,\n",
    "    VectaraQueryConfig,\n",
-    ")"
+    ")\n",
+    "\n",
+    "vectara = Vectara(vectara_api_key=os.getenv(\"VECTARA_API_KEY\"))"
   ]
  },
  {
@ -91,21 +108,37 @@
   "id": "875ffb7e",
   "metadata": {},
   "source": [
-    "First we load the state-of-the-union text into Vectara. \n",
+    "First we load the state-of-the-union text into Vectara.\n",
    "\n",
-    "Note that we use the `from_files` interface which does not require any local processing or chunking - Vectara receives the file content and performs all the necessary pre-processing, chunking and embedding of the file into its knowledge store.\n",
+    "Note that we use the add_files interface which does not require any local processing or chunking - Vectara receives the file content and performs all the necessary pre-processing, chunking and embedding of the file into its knowledge store.\n",
    "\n",
-    "In this case it uses a `.txt` file but the same works for many other [file types](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes)."
+    "In this case it uses a .txt file but the same works for many other [file types](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes)."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
   "id": "be0a4973",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['state_of_the_union.txt']"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "vectara = Vectara.from_files([\"state_of_the_union.txt\"])"
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
+    "file_obj = File(\n",
+    "    file_path=\"../document_loaders/example_data/state_of_the_union.txt\",\n",
+    "    metadata={\"source\": \"text_file\"},\n",
+    ")\n",
+    "vectara.add_files([file_obj], corpus_key)"
   ]
  },
  {
@ -113,38 +146,52 @@
   "id": "22a6b953",
   "metadata": {},
   "source": [
-    "## Basic Vectara RAG (retrieval augmented generation)\n",
+    "## Vectara RAG (retrieval augmented generation)\n",
    "\n",
    "We now create a `VectaraQueryConfig` object to control the retrieval and summarization options:\n",
    "* We enable summarization, specifying we would like the LLM to pick the top 7 matching chunks and respond in English\n",
-    "* We enable MMR (max marginal relevance) in the retrieval process, with a 0.2 diversity bias factor\n",
-    "* We want the top-10 results, with hybrid search configured with a value of 0.025\n",
    "\n",
    "Using this configuration, let's create a LangChain `Runnable` object that encpasulates the full Vectara RAG pipeline, using the `as_rag` method:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
   "id": "9ecda054-96a8-4a91-aeae-32006efb1ac8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"Biden addressed various topics in his statements. He highlighted the need to confront Putin by building a coalition of nations[1]. He also expressed commitment to investigating the impact of burn pits on soldiers' health, including his son's case[2]. Additionally, Biden outlined a plan to fight inflation by cutting prescription drug costs[3]. He emphasized the importance of continuing to combat COVID-19 and not just accepting living with it[4]. Furthermore, he discussed measures to weaken Russia economically and target Russian oligarchs[6]. Biden also advocated for passing the Equality Act to support LGBTQ+ Americans and condemned state laws targeting transgender individuals[7].\""
+       "\"President Biden discussed several key issues in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russian oligarchs, including closing American airspace to Russian flights and targeting their assets, as part of efforts to weaken Russia's economy [3], [7]. Additionally, he reaffirmed the need to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5].\""
      ]
     },
-     "execution_count": 3,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang=\"eng\")\n",
-    "rerank_config = RerankConfig(reranker=\"mmr\", rerank_k=50, mmr_diversity_bias=0.2)\n",
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
+    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key)],\n",
+    "    limit=25,\n",
+    "    reranker=ChainReranker(\n",
+    "        rerankers=[\n",
+    "            CustomerSpecificReranker(reranker_id=\"rnk_272725719\", limit=100),\n",
+    "            MmrReranker(diversity_bias=0.2, limit=100),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "\n",
    "config = VectaraQueryConfig(\n",
-    "    k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
    ")\n",
    "\n",
    "query_str = \"what did Biden say?\"\n",
@ -163,7 +210,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "id": "27f01330-8917-4eff-b603-59ab2571a4d2",
   "metadata": {},
   "outputs": [
@ -171,7 +218,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Biden addressed various topics in his statements. He highlighted the importance of building coalitions to confront global challenges [1]. He also expressed commitment to investigating the impact of burn pits on soldiers' health, including his son's case [2, 4]. Additionally, Biden outlined his plan to combat inflation by cutting prescription drug costs and reducing the deficit, with support from Nobel laureates and business leaders [3]. He emphasized the ongoing fight against COVID-19 and the need to continue combating the virus [5]. Furthermore, Biden discussed measures taken to weaken Russia's economic and military strength, targeting Russian oligarchs and corrupt leaders [6]. He also advocated for passing the Equality Act to support LGBTQ+ Americans and address discriminatory state laws [7]."
+      "President Biden discussed several key issues in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russia, including preventing its central bank from defending the Ruble and targeting Russian oligarchs' assets, as part of efforts to weaken Russia's economy and military [3]. Additionally, he reaffirmed the commitment to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5]. Lastly, he advocated for funding the police with necessary resources and training to ensure community safety [6]."
     ]
    }
   ],
@ -203,7 +250,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
   "id": "b2e0aa2c-7c8e-4d79-8abc-66f5a1f961b3",
   "metadata": {},
   "outputs": [
@ -211,19 +258,12 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Biden addressed various topics in his statements. He highlighted the need to confront Putin by building a coalition of nations[1]. He also expressed his commitment to investigating the impact of burn pits on soldiers' health, referencing his son's experience[2]. Additionally, Biden discussed his plan to fight inflation by cutting prescription drug costs and garnering support from Nobel laureates and business leaders[4]. Furthermore, he emphasized the importance of continuing to combat COVID-19 and not merely accepting living with the virus[5]. Biden's remarks encompassed international relations, healthcare challenges faced by soldiers, economic strategies, and the ongoing battle against the pandemic.\n",
-      "Vectara FCS = 0.41796625\n"
+      "President Biden discussed several key topics in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russian oligarchs, including closing American airspace to Russian flights and targeting their assets, as part of efforts to weaken Russia's economy [3], [7]. Additionally, he reaffirmed the need to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5].\n",
+      "Vectara FCS = 0.61621094\n"
     ]
    }
   ],
   "source": [
-    "summary_config = SummaryConfig(is_enabled=True, max_results=5, response_lang=\"eng\")\n",
-    "rerank_config = RerankConfig(reranker=\"mmr\", rerank_k=50, mmr_diversity_bias=0.1)\n",
-    "config = VectaraQueryConfig(\n",
-    "    k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config\n",
-    ")\n",
-    "\n",
-    "rag = vectara.as_rag(config)\n",
    "resp = rag.invoke(query_str)\n",
    "print(resp[\"answer\"])\n",
    "print(f\"Vectara FCS = {resp['fcs']}\")"
@ -243,26 +283,28 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
   "id": "19cd2f86",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[Document(page_content='He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully. We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin.', metadata={'lang': 'eng', 'section': '1', 'offset': '2160', 'len': '36', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.', metadata={'lang': 'eng', 'section': '1', 'offset': '34652', 'len': '60', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. Danielle says Heath was a fighter to the very end. He didn’t know how to stop fighting, and neither did she. Through her pain she found purpose to demand we do better. Tonight, Danielle—we are.', metadata={'lang': 'eng', 'section': '1', 'offset': '35442', 'len': '57', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'})]"
+       "[Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did.')]"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "config.summary_config.is_enabled = False\n",
-    "config.k = 3\n",
+    "config.generation = None\n",
+    "config.search.limit = 5\n",
    "retriever = vectara.as_retriever(config=config)\n",
    "retriever.invoke(query_str)"
   ]
@ -277,27 +319,34 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
   "id": "59268e9a-6089-4bb2-8c61-1ea6b956f83c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[Document(page_content='He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully. We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin.', metadata={'lang': 'eng', 'section': '1', 'offset': '2160', 'len': '36', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.', metadata={'lang': 'eng', 'section': '1', 'offset': '34652', 'len': '60', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. Danielle says Heath was a fighter to the very end. He didn’t know how to stop fighting, and neither did she. Through her pain she found purpose to demand we do better. Tonight, Danielle—we are.', metadata={'lang': 'eng', 'section': '1', 'offset': '35442', 'len': '57', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content=\"Biden discussed various topics in his statements. He highlighted the importance of unity and preparation to confront challenges, such as building coalitions to address global issues [1]. Additionally, he shared personal stories about the impact of health issues on soldiers, including his son's experience with brain cancer possibly linked to burn pits [2]. Biden also outlined his plans to combat inflation by cutting prescription drug costs and emphasized the ongoing efforts to combat COVID-19, rejecting the idea of merely living with the virus [4, 5]. Overall, Biden's messages revolved around unity, healthcare challenges faced by soldiers, economic plans, and the ongoing fight against COVID-19.\", metadata={'summary': True, 'fcs': 0.54751414})]"
+       "[Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='We won’t be able to compete for the jobs of the 21st Century if we don’t fix that. That’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. This was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. We’re done talking about infrastructure weeks. We’re going to have an infrastructure decade.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. We are joining with our European allies to find and seize your yachts your luxury apartments your private jets.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='It delivered immediate economic relief for tens of millions of Americans. Helped put food on their table, keep a roof over their heads, and cut the cost of health insurance. And as my Dad used to say, it gave people a little breathing room. And unlike the $2 Trillion tax cut passed in the previous administration that benefitted the top 1% of Americans, the American Rescue Plan helped working people—and left no one behind. Lots of jobs. \\n\\nIn fact—our economy created over 6.5 Million new jobs just last year, more jobs created in one year  \\nthan ever before in the history of America.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='All told, we created 369,000 new manufacturing jobs in America just last year. Powered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” It’s time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. Inflation is robbing them of the gains they might otherwise feel.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Danielle says Heath was a fighter to the very end. He didn’t know how to stop fighting, and neither did she. Through her pain she found purpose to demand we do better. Tonight, Danielle—we are. The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits.'),\n",
+       " Document(metadata={'summary': True, 'fcs': (0.54785156,)}, page_content='President Biden spoke about several key issues. He emphasized the importance of the Bipartisan Infrastructure Law, calling it the most significant investment to rebuild America and highlighting it as a bipartisan effort [1]. He also announced measures against Russian oligarchs, including assembling a task force to seize their assets and closing American airspace to Russian flights, further isolating Russia economically [2]. Additionally, he expressed a commitment to investigating the health impacts of burn pits on military personnel, referencing his son, Major Beau Biden, who suffered from brain cancer [3].')]"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "config.summary_config.is_enabled = True\n",
-    "config.k = 3\n",
+    "config.generation = GenerationConfig()\n",
+    "config.search.limit = 10\n",
    "retriever = vectara.as_retriever(config=config)\n",
    "retriever.invoke(query_str)"
   ]
@ -316,17 +365,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 14,
   "id": "e14325b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "\"Biden's statement highlighted his efforts to unite freedom-loving nations against Putin's aggression, sharing information in advance to counter Russian lies and hold Putin accountable[1]. Additionally, he emphasized his commitment to military families, like Danielle Robinson, and outlined plans for more affordable housing, Pre-K for 3- and 4-year-olds, and ensuring no additional taxes for those earning less than $400,000 a year[2][3]. The statement also touched on the readiness of the West and NATO to respond to Putin's actions, showcasing extensive preparation and coalition-building efforts[4]. Heath Robinson's story, a combat medic who succumbed to cancer from burn pits, was used to illustrate the resilience and fight for better conditions[5].\""
+       "'The remarks made by Biden include his emphasis on the importance of the Bipartisan Infrastructure Law, which he describes as the most significant investment to rebuild America in history. He highlights the bipartisan effort involved in passing this law and expresses gratitude to members of both parties for their collaboration. Biden also mentions the transition from \"infrastructure weeks\" to an \"infrastructure decade\" [1]. Additionally, he shares a personal story about his father having to leave their home in Scranton, Pennsylvania, to find work, which influenced his decision to fight for the American Rescue Plan to help those in need [2].'"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -371,7 +420,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.0"
  }
 },
 "nbformat": 4,
--- a/docs/docs/tutorials/llm_chain.ipynb
+++ b/docs/docs/tutorials/llm_chain.ipynb
@ -39,6 +39,7 @@
    "\n",
    "To install LangChain run:\n",
    "\n",
+    "<!-- HIDE_IN_NB\n",
    "import Tabs from '@theme/Tabs';\n",
    "import TabItem from '@theme/TabItem';\n",
    "import CodeBlock from \"@theme/CodeBlock\";\n",
@ -51,9 +52,28 @@
    "    <CodeBlock language=\"bash\">conda install langchain -c conda-forge</CodeBlock>\n",
    "  </TabItem>\n",
    "</Tabs>\n",
+    "HIDE_IN_NB -->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "86874822",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | output: false\n",
    "\n",
-    "\n",
-    "\n",
+    "# %pip install langchain\n",
+    "# OR\n",
+    "# %conda install langchain -c conda-forge"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a546a5bc",
+   "metadata": {},
+   "source": [
    "For more details, see our [Installation guide](/docs/how_to/installation).\n",
    "\n",
    "### LangSmith\n",
@ -67,17 +87,45 @@
    "```shell\n",
    "export LANGSMITH_TRACING=\"true\"\n",
    "export LANGSMITH_API_KEY=\"...\"\n",
+    "export LANGSMITH_PROJECT=\"default\" # or any other project name\n",
    "```\n",
    "\n",
-    "Or, if in a notebook, you can set them with:\n",
-    "\n",
-    "```python\n",
+    "Or, if in a notebook, you can set them with:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "599bb688",
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "import getpass\n",
    "import os\n",
    "\n",
+    "try:\n",
+    "    # load environment variables from .env file (requires `python-dotenv`)\n",
+    "    from dotenv import load_dotenv\n",
+    "\n",
+    "    load_dotenv()\n",
+    "except ImportError:\n",
+    "    pass\n",
+    "\n",
    "os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
-    "os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
-    "```"
+    "if \"LANGSMITH_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\n",
+    "        prompt=\"Enter your LangSmith API key (optional): \"\n",
+    "    )\n",
+    "if \"LANGSMITH_PROJECT\" not in os.environ:\n",
+    "    os.environ[\"LANGSMITH_PROJECT\"] = getpass.getpass(\n",
+    "        prompt='Enter your LangSmith Project Name (default = \"default\"): '\n",
+    "    )\n",
+    "    if not os.environ.get(\"LANGSMITH_PROJECT\"):\n",
+    "        os.environ[\"LANGSMITH_PROJECT\"] = \"default\"\n",
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\n",
+    "        prompt=\"Enter your OpenAI API key (required if using OpenAI): \"\n",
+    "    )"
   ]
  },
  {
@ -89,9 +137,11 @@
    "\n",
    "First up, let's learn how to use a language model by itself. LangChain supports many different language models that you can use interchangeably. For details on getting started with a specific model, refer to [supported integrations](/docs/integrations/chat/).\n",
    "\n",
+    "<!-- HIDE_IN_NB>\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
    "\n",
-    "<ChatModelTabs overrideParams={{openai: {model: \"gpt-4o-mini\"}}} />\n"
+    "<ChatModelTabs overrideParams={{openai: {model: \"gpt-4o-mini\"}}} />\n",
+    "HIDE_IN_NB -->"
   ]
  },
  {
--- a/docs/scripts/notebook_convert.py
+++ b/docs/scripts/notebook_convert.py
@ -9,9 +9,12 @@ import nbformat
 from nbconvert.exporters import MarkdownExporter
 from nbconvert.preprocessors import Preprocessor

+HIDE_IN_NB_MAGIC_OPEN = "<!-- HIDE_IN_NB"
+HIDE_IN_NB_MAGIC_CLOSE = "HIDE_IN_NB -->"
+

 class EscapePreprocessor(Preprocessor):
-    def preprocess_cell(self, cell, resources, cell_index):
+    def preprocess_cell(self, cell, resources, index):
        if cell.cell_type == "markdown":
            # rewrite .ipynb links to .md
            cell.source = re.sub(
@ -61,7 +64,7 @@ class ExtractAttachmentsPreprocessor(Preprocessor):
    outputs are returned in the 'resources' dictionary.
    """

-    def preprocess_cell(self, cell, resources, cell_index):
+    def preprocess_cell(self, cell, resources, index):
        """
        Apply a transformation on each cell,
        Parameters
@ -117,11 +120,19 @@ class CustomRegexRemovePreprocessor(Preprocessor):
        return nb, resources


+class UnHidePreprocessor(Preprocessor):
+    def preprocess_cell(self, cell, resources, index):
+        cell.source = cell.source.replace(HIDE_IN_NB_MAGIC_OPEN, "")
+        cell.source = cell.source.replace(HIDE_IN_NB_MAGIC_CLOSE, "")
+        return cell, resources
+
+
 exporter = MarkdownExporter(
    preprocessors=[
        EscapePreprocessor,
        ExtractAttachmentsPreprocessor,
        CustomRegexRemovePreprocessor,
+        UnHidePreprocessor,
    ],
    template_name="mdoutput",
    extra_template_basedirs=["./scripts/notebook_convert_templates"],
--- a/docs/static/llms.txt
+++ b/docs/static/llms.txt
@ -0,0 +1,436 @@
+# LangChain
+
+## High level
+
+[Why LangChain?](https://python.langchain.com/docs/concepts/why_langchain/): considering using LangChain, when building complex AI applications, and when needing to evaluate AI applications This page discusses the main reasons to use LangChain: standardized component interfaces, orchestration capabilities, and observability/evaluation through LangSmith
+[Architecture](https://python.langchain.com/docs/concepts/architecture/): needing an overview of the LangChain architecture, exploring the various packages and components, or deciding which parts to use for a specific application. Provides a high-level overview of the different packages that make up the LangChain framework, including langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, and LangSmith.
+
+## Concepts
+
+[Chat Models](https://python.langchain.com/docs/concepts/chat_models/): building applications using chat models, learning about chat model interfaces and features, or interested in integrating chat models with external tools and services. Provides an overview of chat models in LangChain, including their features, integration options, interfaces, tool calling, structured outputs, multimodality, context windows, and advanced topics like rate-limiting and caching.
+[Messages](https://python.langchain.com/docs/concepts/messages/): querying LangChain's chat message format, understanding different message types, building chat applications. Messages are the unit of communication in chat models, representing input/output with roles, content, metadata. Covers SystemMessage, HumanMessage, AIMessage, AIMessageChunk, ToolMessage, RemoveMessage, and legacy FunctionMessage.
+[Chat history](https://python.langchain.com/docs/concepts/chat_history/): dealing with chat history, managing chat context, or understanding conversation patterns. Covers chat history structure, conversation patterns between user/assistant/tools, and guidelines for managing chat history to stay within context window.
+[Tools](https://python.langchain.com/docs/concepts/tools/): needing an overview of tools in LangChain, wanting to create custom tools, or learning how to pass runtime values to tools. Tools are a way to encapsulate functions with schemas that can be passed to chat models supporting tool calling. The page covers the tool interface, creating tools using the @tool decorator, configuring tool schemas, tool artifacts, special type annotations like InjectedToolArg, and toolkits.
+[tool calling](https://python.langchain.com/docs/concepts/tool_calling/): needing to understand how to enable tool calling functionality, how to create tools from functions, how to bind tools to a model that supports tool calling. The page covers the key concepts of tool calling, including tool creation using decorators, tool binding to models, tool calling by models, and tool execution. It provides an overview, recommended usage, and best practices.
+[structured outputs](https://python.langchain.com/docs/concepts/structured_outputs/): it needs to return output in a structured format, when working with databases or APIs that require structured data, or when building applications with structured responses. Covers structured output concepts like schema definition and methods like tool calling and JSON mode, as well as helper functions, to instruct models to produce structured outputs conforming to a given schema.
+[Memory](https://langchain-ai.github.io/langgraph/concepts/memory/): developing agents with memory capabilities, implementing memory management strategies, or learning about different types of memory for AI agents. Covers topics related to short-term and long-term memory for agents, techniques for managing conversation history and summarizing past conversations, different types of memory (semantic, episodic, procedural), and approaches for writing memories in the hot path or in the background.
+[Multimodality](https://python.langchain.com/docs/concepts/multimodality/): needing to understand multimodal capabilities, using chat models with multimodal inputs, or using multimodal retrieval/embeddings. Discusses ability of LangChain components like chat models, embedding models, and vector stores to handle multimodal data like text, images, audio, video. Covers current status and limitations around multimodal inputs and outputs for chat models.
+[invoke](https://python.langchain.com/docs/concepts/runnables/): learning how to use the Runnable interface, when working with custom Runnables, and when needing to configure Runnables at runtime. The page covers the Runnable interface, its methods for invocation, batching, streaming, inspecting schemas, and configuration. It explains RunnableConfig, custom Runnables, and configurable Runnables.
+[stream](https://python.langchain.com/docs/concepts/streaming/): [building applications that use streaming, building applications that need to display partial results in real-time, building applications that need to provide updates on pipeline or workflow progress] 'This page covers streaming in LangChain, including what can be streamed in LLM applications, the streaming APIs available, how to write custom data to the stream, and how LangChain automatically enables streaming for chat models in certain cases.'
+[LCEL](https://python.langchain.com/docs/concepts/lcel/): needing an overview of the LangChain Expression Language (LCEL), deciding whether to use LCEL or not, and understanding how to compose chains using LCEL primitives. Provides an overview of the LCEL, a declarative approach to building chains from existing Runnables, covering its benefits, composition primitives like RunnableSequence and RunnableParallel, the composition syntax, automatic type coercion, and guidance on when to use LCEL versus alternatives like LangGraph.
+[Document Loaders](https://python.langchain.com/docs/concepts/document_loaders/): needing to load data from various sources like files, webpages, or databases, or when handling large datasets with lazy loading. Document loaders help load data from different sources into a standardized Document object format, with options for lazy loading of large datasets.
+[Retrieval](https://python.langchain.com/docs/concepts/retrieval/): building retrieval systems, understanding query analysis, integrating with databases This page covers key concepts and techniques in retrieval systems, including query analysis (re-writing and construction), vector and lexical indexes, databases, and LangChain's unified retriever interface.
+[Text Splitters](https://python.langchain.com/docs/concepts/text_splitters/): working with long documents, handling limited model input sizes, or optimizing retrieval systems This page discusses different strategies for splitting large texts into smaller chunks, including length-based, text structure-based, document structure-based, and semantic meaning-based approaches.
+[Embedding Models](https://python.langchain.com/docs/concepts/embedding_models/): LLM should read this page when: 1) Working with text embeddings for search/retrieval 2) Comparing text similarity using embedding vectors 3) Selecting or integrating text embedding models It covers key concepts of embedding models: converting text to numerical vectors, measuring similarity between vectors, embedding models (historical context, interface, integrations), and common similarity metrics (cosine, Euclidean, dot product).
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores/): LLM should read this page when: 1) Building applications that need to index and retrieve information based on semantic similarity 2) Integrating vector databases into their application 3) Exploring advanced vector search and retrieval techniques Vector stores are specialized data stores that enable indexing and retrieving information based on vector representations (embeddings) of data, allowing semantic similarity search over unstructured data like text, images, and audio. The page covers vector store integrations, the core interface, adding/deleting documents, basic and advanced similarity search techniques, and concepts like metadata filtering.
+[Retrievers](https://python.langchain.com/docs/concepts/retrievers/): building a retrieval system, integrating different retrieval sources, or linking retrieved information to source documents. This page outlines the retriever interface in LangChain, common types of retrievers such as vector stores and search APIs, and advanced retrieval patterns like ensembling and retaining source document information.
+[Retrieval Augmented Generation (RAG)](https://python.langchain.com/docs/concepts/rag/): developing applications that incorporate retrieval and generation, building question-answering systems with external data sources, or optimizing knowledge retrieval and integration into language models. Covers the concept of Retrieval Augmented Generation (RAG), which combines retrieval systems with language models to utilize external knowledge, access up-to-date information, leverage domain-specific expertise, reduce hallucination, and integrate knowledge cost-effectively.
+[Agents](https://python.langchain.com/docs/concepts/agents/): building AI agents or systems that take high-level tasks and perform a series of actions to accomplish them, transitioning from the legacy AgentExecutor to the newer and more flexible LangGraph system. Provides an overview of agents in LangChain, the legacy AgentExecutor concept, resources for using AgentExecutor, and guidance on migrating to the preferred LangGraph architecture for building customizable agents.
+[Prompt Templates](https://python.langchain.com/docs/concepts/prompt_templates/): creating prompts for language models, formatting chat messages, slotting messages into specific locations in a prompt. This page covers different types of prompt templates (string, chat, messages placeholder) for formatting prompts for language models and chat models.
+[Output Parsers](https://python.langchain.com/docs/concepts/output_parsers/): looking for ways to extract structured data from model outputs, parsing model outputs into different formats, or handling errors in parsing. Covers various LangChain output parsers like JSON, XML, CSV, Pandas DataFrame, along with capabilities like output fixing, retrying, and using user-defined formats.
+[Few-shot prompting](https://python.langchain.com/docs/concepts/few_shot_prompting/): needing to improve model performance, when deciding how to format few-shot examples, when selecting examples for few-shot prompting The page covers generating examples, number of examples, selecting examples, and formatting examples for few-shot prompting with language models.
+[Example Selectors](https://python.langchain.com/docs/concepts/example_selectors/): selecting examples for few-shot prompting, dynamically choosing examples for prompts, or understanding different example selection techniques. The page covers example selectors, which are classes responsible for selecting and formatting examples to include as part of prompts for improved performance with few-shot learning.
+[Async programming](https://python.langchain.com/docs/concepts/async/): building asynchronous applications with LangChain, working with async runnables, or handling async API calls. Explains LangChain's asynchronous APIs, delegation to sync methods, performance considerations, compatibility with asyncio, and usage in Jupyter notebooks.
+[Callbacks](https://python.langchain.com/docs/concepts/callbacks/): [needing to log, monitor, or stream events in an LLM application] [This page covers LangChain's callback system, which allows hooking into various stages of an LLM application for logging, monitoring, streaming, and other purposes. It explains the different callback events, callback handlers, and how to pass callbacks.]
+[Tracing](https://python.langchain.com/docs/concepts/tracing/): tracing the steps of a chain/agent for debugging, understanding the chain's flow, or inspecting intermediary outputs. Discusses the concept of tracing in LangChain, including that traces contain runs which are individual steps, and that tracing provides observability into chains/agents.
+[Evaluation](https://python.langchain.com/docs/concepts/evaluation/): evaluating the performance of LLM-powered applications, creating or curating datasets, defining metrics for evaluation This page covers the concept of evaluation in LangChain, including using LangSmith to create datasets, define metrics, track results over time, and run evaluations automatically.
+[Testing](https://python.langchain.com/docs/concepts/testing/): testing LangChain components, implementing unit tests, or setting up integration tests This page explains unit tests, integration tests, and standard tests in LangChain, including code examples
+
+## How-to guides
+
+### Installation
+
+[How to: install LangChain packages](https://python.langchain.com/docs/how_to/installation/): installing LangChain packages, learning about the LangChain ecosystem packages, installing specific ecosystem packages This page explains how to install the main LangChain package, as well as different ecosystem packages like langchain-core, langchain-community, langchain-openai, langchain-experimental, langgraph, langserve, langchain-cli, and langsmith SDK.
+[How to: use LangChain with different Pydantic versions](https://python.langchain.com/docs/how_to/pydantic_compatibility/): needing to use LangChain with different Pydantic versions, needing to install Pydantic 2 with LangChain, or avoiding using the pydantic.v1 namespace with LangChain APIs. The page explains that LangChain 0.3 uses Pydantic 2 internally and advises users to install Pydantic 2 and avoid using the pydantic.v1 namespace with LangChain APIs.
+[How to: return structured data from a model](https://python.langchain.com/docs/how_to/structured_output/): LLM should read this page when: 1) wanting to return structured data from a model, 2) building applications that require structured outputs, 3) exploring techniques for parsing model outputs into objects or schemas. This page covers methods for obtaining structured outputs from language models, including using .with_structured_output(), prompting techniques with output parsers, and handling complex schemas with few-shot examples.
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: stream runnables](https://python.langchain.com/docs/how_to/streaming/): Line 1: 'wanting to learn how to stream LLM responses, stream intermediate steps, and configure streaming events.' Line 2: 'This page covers how to use the `stream` and `astream` methods to stream final outputs, how to use `astream_events` to stream both final outputs and intermediate steps, filtering events, propagating callbacks for streaming, and working with input streams.'
+[How to: debug your LLM apps](https://python.langchain.com/docs/how_to/debugging/): debugging LLM applications, adding print statements, or logging events for tracing. Covers setting verbose mode to print important events, debug mode to print all events, and using LangSmith for visualizing event traces.
+
+### Components
+
+These are the core building blocks you can use when building applications.
+
+#### Chat models
+
+[Chat Models](https://python.langchain.com/docs/concepts/chat_models/): building applications using chat models, learning about chat model interfaces and features, or interested in integrating chat models with external tools and services. Provides an overview of chat models in LangChain, including their features, integration options, interfaces, tool calling, structured outputs, multimodality, context windows, and advanced topics like rate-limiting and caching.
+[here](https://python.langchain.com/docs/integrations/chat/): integrating chat models into an application, using chat models for conversational AI tasks, or choosing between different chat model providers. Provides an overview of chat models integrated with LangChain, including OpenAI, Anthropic, Google, and others. Covers key features like tool calling, structured output, JSON mode, local usage, and multimodal support.
+
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: get models to return structured output](https://python.langchain.com/docs/how_to/structured_output/): wanting to obtain structured output from an LLM, needing to parse JSON/XML/YAML output from an LLM, or looking to use few-shot examples with structured outputs. This page covers using the `.with_structured_output()` method to obtain structured data from LLMs, prompting techniques to elicit structured outputs, and parsing structured outputs.
+[How to: cache model responses](https://python.langchain.com/docs/how_to/chat_model_caching/): needing to cache ChatModel responses for efficiency, needing to reduce API calls for cost savings, or during development. This page covers how to use an in-memory cache or a SQLite database for caching ChatModel responses, which can improve performance and reduce costs.
+[How to: get log probabilities](https://python.langchain.com/docs/how_to/logprobs/): Line 1: 'seeking to get token-level log probabilities from OpenAI chat models, when needing to understand how log probabilities are represented in LangChain' Line 2: 'Explains how to configure OpenAI chat models to return token log probabilities, and how these are included in the response metadata and streamed responses.'
+[How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model/): creating a custom chat model class, integrating a new language model as a chat model, or implementing streaming for a chat model. This page explains how to create a custom chat model class by inheriting from BaseChatModel, and implementing methods like _generate and _stream. It covers handling inputs, messages, streaming, identifying parameters, and contributing custom chat models.
+[How to: stream a response back](https://python.langchain.com/docs/how_to/chat_streaming/): LLM should read this page when: 1) It needs to stream chat model responses token-by-token 2) It needs to understand how to use the astream() and astream_events() methods for chat models 3) It wants to see examples of streaming chat model responses synchronously and asynchronously This page explains how to stream chat model responses token-by-token using the astream() and astream_events() methods, and provides examples for synchronous and asynchronous streaming with chat models that support this feature.
+[How to: track token usage](https://python.langchain.com/docs/how_to/chat_token_usage_tracking/): tracking token usage for chat models, determining costs of using chat models, implementing token usage tracking in applications. Provides methods to track token usage from OpenAI and Anthropic chat models through AIMessage.usage_metadata, callbacks, and using LangSmith. Covers streaming token usage and aggregating usage across multiple calls.
+[How to: track response metadata across providers](https://python.langchain.com/docs/how_to/response_metadata/): needing to access metadata from model responses, wanting to get information like token usage or log probabilities, or checking safety ratings Explains how to access response metadata from various chat model providers like OpenAI, Anthropic, Vertex AI, etc. Shows code examples of retrieving metadata like token usage, log probabilities, and safety ratings.
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: stream tool calls](https://python.langchain.com/docs/how_to/tool_streaming/): Line 1: 'wanting to stream tool calls, when needing to handle partial tool call data, or when needing to accumulate tool call chunks' Line 2: 'This page explains how to stream tool calls, merge message chunks to accumulate tool call chunks, and parse tool calls from accumulated chunks, with code examples.'
+[How to: handle rate limits](https://python.langchain.com/docs/how_to/chat_model_rate_limiting/): handling rate limits from model providers, running many parallel queries to a model, benchmarking a chat model. The page explains how to initialize and use an in-memory rate limiter with chat models to limit the number of requests made per unit time.
+[How to: few shot prompt tool behavior](https://python.langchain.com/docs/how_to/tools_few_shot/): using few-shot examples to improve tool calling, demonstrating how to incorporate example queries and responses into the prompt. The page explains how to create few-shot prompts including examples of tool usage, allowing the model to learn from these demonstrations to improve its ability to correctly call tools for math operations or other tasks.
+[How to: bind model-specific formatted tools](https://python.langchain.com/docs/how_to/tools_model_specific/): binding model-specific tools, binding OpenAI tool schemas, invoking model-specific tools This page explains how to bind model-specific tool schemas directly to an LLM, with an example using the OpenAI tool schema format.
+[How to: force models to call a tool](https://python.langchain.com/docs/how_to/tool_choice/): needing to force an LLM to call a specific tool, needing to force an LLM to call at least one tool This page shows how to use the tool_choice parameter to force an LLM to call a specific tool or to call at least one tool from a set of available tools.
+[How to: work with local models](https://python.langchain.com/docs/how_to/local_llms/): [running LLMs locally on a user's device, using open-source LLMs, utilizing custom prompts with LLMs] [Overview of open-source LLMs and frameworks for running inference locally, instructions for setting up and using local LLMs (Ollama, llama.cpp, GPT4All, llamafile), guidance on formatting prompts for specific LLMs, potential use cases for local LLMs.]
+[How to: init any model in one line](https://python.langchain.com/docs/how_to/chat_models_universal_init/): initializing chat models for different model providers, creating a configurable chat model, inferring the model provider from the model name. The page explains how to initialize any LLM chat model integration in one line using the init_chat_model() helper, create a configurable chat model with default or custom parameters, and infer the model provider based on the model name.
+
+#### Messages
+
+[Messages](https://python.langchain.com/docs/concepts/messages/): querying LangChain's chat message format, understanding different message types, building chat applications. Messages are the unit of communication in chat models, representing input/output with roles, content, metadata. Covers SystemMessage, HumanMessage, AIMessage, AIMessageChunk, ToolMessage, RemoveMessage, and legacy FunctionMessage.
+
+[How to: manage large chat history](https://python.langchain.com/docs/how_to/trim_messages/): working with long chat histories, when concerned about token limits for chat models, when implementing token management strategies. This page explains how to use the trim_messages utility to reduce the size of a chat message history to fit within token limits, covering trimming by token count or message count, and allowing customization of trimming strategies.
+[How to: filter messages](https://python.langchain.com/docs/how_to/filter_messages/): needing to filter messages by type, id, or name when working with message histories, when using chains/agents that pass message histories between components. Provides instructions and examples for filtering message lists (e.g. to only include human messages) using the filter_messages utility, including basic usage, chaining with models, and API reference.
+[How to: merge consecutive messages of the same type](https://python.langchain.com/docs/how_to/merge_message_runs/): it needs to merge consecutive messages of the same type for a particular model, when it wants to compose the merge_message_runs utility with other components in a chain, or when it needs to invoke the merge_message_runs utility imperatively. The page explains how to use the merge_message_runs utility to merge consecutive messages of the same type, provides examples of using it in chains or invoking it directly, and links to the API reference for more details.
+
+#### Prompt templates
+
+[Prompt Templates](https://python.langchain.com/docs/concepts/prompt_templates/): creating prompts for language models, formatting chat messages, slotting messages into specific locations in a prompt. This page covers different types of prompt templates (string, chat, messages placeholder) for formatting prompts for language models and chat models.
+
+[How to: use few shot examples](https://python.langchain.com/docs/how_to/few_shot_examples/): creating few-shot prompts, using example selectors, providing examples to large language models This page explains how to use few-shot examples to provide context to language models, including creating formatters, constructing example sets, using example selectors like SemanticSimilarityExampleSelector, and creating FewShotPromptTemplates.
+[How to: use few shot examples in chat models](https://python.langchain.com/docs/how_to/few_shot_examples_chat/): LLM should read this page when: 1) wanting to provide a few-shot example to fine-tune a chat model's output, 2) needing to dynamically select examples from a larger set based on semantic similarity to the input This page covers how to provide few-shot examples to chat models using either fixed examples or dynamically selecting examples from a vectorstore based on semantic similarity to the input.
+[How to: partially format prompt templates](https://python.langchain.com/docs/how_to/prompts_partial/): needing to partially format prompt templates, wanting to pass partial strings to templates, or needing to pass functions returning strings to templates. Explains how to partially format prompt templates by passing in a subset of required values as strings or functions that return strings, to create a new template expecting only remaining values.
+[How to: compose prompts together](https://python.langchain.com/docs/how_to/prompts_composition/): needing to compose prompts from various prompt components, working with chat prompts, or using the PipelinePromptTemplate class. This page explains how to concatenate different prompt templates together to build larger prompts, covering both string prompts and chat prompts, as well as using the PipelinePromptTemplate to reuse prompt components.
+
+#### Example selectors
+
+[Example Selectors](https://python.langchain.com/docs/concepts/example_selectors/): selecting examples for few-shot prompting, dynamically choosing examples for prompts, or understanding different example selection techniques. The page covers example selectors, which are classes responsible for selecting and formatting examples to include as part of prompts for improved performance with few-shot learning.
+
+[How to: use example selectors](https://python.langchain.com/docs/how_to/example_selectors/): needing to select example prompts for few-shot learning, when having many examples to choose from, or when creating a custom example selector. Explains how to use example selectors in LangChain to select which examples to include in a prompt, covering built-in selectors like similarity and providing a custom example selector.
+[How to: select examples by length](https://python.langchain.com/docs/how_to/example_selectors_length_based/): selecting examples for few-shot prompting, handling long examples that may exceed context window, and dynamically including the appropriate number of examples. This page explains how to use the LengthBasedExampleSelector to select examples based on their length, including fewer examples for longer inputs to avoid exceeding the context window.
+[How to: select examples by semantic similarity](https://python.langchain.com/docs/how_to/example_selectors_similarity/): selecting relevant examples for few-shot prompting, building example-based systems, finding relevant reference cases This page covers how to select examples by similarity to the input using embedding-based semantic search over a vector store.
+[How to: select examples by semantic ngram overlap](https://python.langchain.com/docs/how_to/example_selectors_ngram/): selecting relevant examples to include in few-shot prompts, determining relevancy through n-gram overlap scores, and customizing example selection thresholds. Explains how to use the NGramOverlapExampleSelector to select and order examples based on n-gram overlap with the input text, including setting thresholds and dynamically adding examples.
+[How to: select examples by maximal marginal relevance](https://python.langchain.com/docs/how_to/example_selectors_mmr/): needing to select few-shot examples optimizing for both similarity to inputs and diversity from each other, working with example-based prompting for fewshot learning. Demonstrates how to use the MaxMarginalRelevanceExampleSelector, which selects examples by maximizing relevance to inputs while also optimizing for diversity between selected examples, contrasting it with just selecting by similarity.
+[How to: select examples from LangSmith few-shot datasets](https://python.langchain.com/docs/how_to/example_selectors_langsmith/): [learning how to use LangSmith datasets for few-shot example selection, dynamically creating few-shot prompts from LangSmith data, integrating LangSmith with LangChain chains] [The page covers setting up LangSmith, querying LangSmith datasets for similar examples, and using those examples in a LangChain chain to create dynamic few-shot prompts for chat models.]
+
+#### LLMs
+
+[LLMs](https://python.langchain.com/docs/concepts/text_llms/): needing an overview of string-based language models, learning about legacy models in LangChain, or comparing string-based models to chat models. Covers LangChain's support for older language models that take strings as input and output, distinguishing them from newer chat models; advises using chat models where possible.
+
+[How to: cache model responses](https://python.langchain.com/docs/how_to/llm_caching/): it needs to cache responses to save money and time, learn about caching in LangChain. LangChain provides an optional caching layer for LLMs to save money and time by reducing API calls for repeated requests. Examples show caching with InMemoryCache and SQLiteCache.
+[How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm/): creating a custom LLM class, wrapping their own LLM provider, integrating with a new language model not yet supported by LangChain. This page explains how to create a custom LLM class by implementing the required _call and _llm_type methods, as well as optional methods like _identifying_params, _acall, _stream, and _astream. It provides an example implementation, demonstrates testing and integration with LangChain APIs, and offers guidance for contributing custom LLM integrations.
+[How to: stream a response back](https://python.langchain.com/docs/how_to/streaming_llm/): it needs to stream responses from an LLM, when it needs to work with async streaming from LLMs, when it needs to stream events from an LLM. This page shows how to stream responses token-by-token from LLMs using both sync and async methods, as well as how to stream events from LLMs asynchronously.
+[How to: track token usage](https://python.langchain.com/docs/how_to/llm_token_usage_tracking/): tracking token usage for LLM calls, managing costs for an LLM application, or calculating costs based on token counts. The page covers how to track token usage using LangSmith, OpenAI callback handlers, and handling streaming contexts; it also summarizes limitations with legacy models for streaming.
+[How to: work with local models](https://python.langchain.com/docs/how_to/local_llms/): [running LLMs locally on a user's device, using open-source LLMs, utilizing custom prompts with LLMs] [Overview of open-source LLMs and frameworks for running inference locally, instructions for setting up and using local LLMs (Ollama, llama.cpp, GPT4All, llamafile), guidance on formatting prompts for specific LLMs, potential use cases for local LLMs.]
+
+#### Output parsers
+
+[Output Parsers](https://python.langchain.com/docs/concepts/output_parsers/): looking for ways to extract structured data from model outputs, parsing model outputs into different formats, or handling errors in parsing. Covers various LangChain output parsers like JSON, XML, CSV, Pandas DataFrame, along with capabilities like output fixing, retrying, and using user-defined formats.
+
+[How to: parse text from message objects](https://python.langchain.com/docs/how_to/output_parser_string/): needing to parse text from message objects, needing to extract text from chat model responses, or working with structured output formats. This page explains how to use the StrOutputParser to extract text from message objects, regardless of the underlying content format, such as text, multimodal data, or structured output.
+[How to: use output parsers to parse an LLM response into structured format](https://python.langchain.com/docs/how_to/output_parser_structured/): [needing to parse LLM output into structured data, needing to stream partially parsed structured outputs, using LCEL with output parsers] 'Explains how to use output parsers like PydanticOutputParser to parse LLM text responses into structured formats like Python objects, and how to integrate them with prompts, models, and LCEL streaming.'
+[How to: parse JSON output](https://python.langchain.com/docs/how_to/output_parser_json/): LLM should read this page when: 1) Prompting a language model to return JSON output 2) Parsing JSON output from a language model 3) Streaming partial JSON objects from a language model 'This page explains how to use the JsonOutputParser to specify a desired JSON schema, prompt a language model to generate output conforming to that schema, and parse the model's response as JSON. It covers using JsonOutputParser with and without Pydantic, streaming partial JSON objects, and provides code examples.'
+[How to: parse XML output](https://python.langchain.com/docs/how_to/output_parser_xml/): needing to parse XML output from a model, when outputting prompts with XML formatting instructions for models, when streaming partial XML results This page shows how to use the XMLOutputParser to parse model output in XML format, including adding XML formatting instructions to prompts and streaming partial XML output
+[How to: parse YAML output](https://python.langchain.com/docs/how_to/output_parser_yaml/): LLM should read this page when: 1) Needing to generate YAML output conforming to a specific schema 2) Incorporating YAML output into a larger prompt/chain 3) Parsing YAML output returned by an LLM 'This page explains how to use the YamlOutputParser to parse YAML output from language models, allowing the output to conform to a predefined schema. It covers setting up the parser, constructing prompts with formatting instructions, and chaining the parser with a model.'
+[How to: retry when output parsing errors occur](https://python.langchain.com/docs/how_to/output_parser_retry/): [attempting to parse and handle partial or error LLM outputs, troubleshooting output parsing failures, implementing retry logic for parsing] [Explains how to use the RetryOutputParser to handle parsing errors by reprompting the LLM, provides examples for using it with OpenAI models and chaining it with other runnables.]
+[How to: try to fix errors in output parsing](https://python.langchain.com/docs/how_to/output_parser_fixing/): needing to handle improperly formatted outputs, attempting to fix formatting issues using an LLM, or parsing outputs that do not conform to a predefined schema. Explains how to use the OutputFixingParser, which wraps another parser and attempts to fix formatting errors by consulting an LLM when the original parser fails.
+[How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom/): Line 1: 'creating a custom output parser, implementing a custom parser by inheriting from base classes, or parsing raw model outputs' Line 2: 'Covers how to create custom output parsers using runnable lambdas/generators (recommended) or by inheriting from base parser classes like BaseOutputParser and BaseGenerationOutputParser. Includes examples for simple and more complex parsing scenarios.'
+
+#### Document loaders
+
+[Document Loaders](https://python.langchain.com/docs/concepts/document_loaders/): needing to load data from various sources like files, webpages, or databases, or when handling large datasets with lazy loading. Document loaders help load data from different sources into a standardized Document object format, with options for lazy loading of large datasets.
+
+- [How to: load PDF files](https://python.langchain.com/docs/how_to/document_loader_pdf/)
+[How to: load web pages](https://python.langchain.com/docs/how_to/document_loader_web/): LLM should read this page when: - It needs to load and process web pages for question answering or other applications - It needs guidance on using web page content with LangChain 'The page covers how to load web pages into LangChain's Document format, including simple text extraction and advanced parsing of page structure. It demonstrates tools like WebBaseLoader and UnstructuredLoader, and shows how to perform operations like vector search over loaded web content.'
+[How to: load CSV data](https://python.langchain.com/docs/how_to/document_loader_csv/): loading CSV files into a sequence of documents, customizing CSV parsing and loading, specifying a column to identify the document source This page explains how to load CSV files into a sequence of Document objects using LangChain's CSVLoader, including customizing the parsing, specifying a source column, and loading from a string.
+[How to: load data from a directory](https://python.langchain.com/docs/how_to/document_loader_directory/): loading documents from a file system, handling various file encodings, or using custom document loaders. Shows how to load files from directories using the DirectoryLoader, handle encoding errors, use multithreading, and customize the loader class.
+[How to: load HTML data](https://python.langchain.com/docs/how_to/document_loader_html/): loading HTML documents, parsing HTML files with specialized tools, or extracting text from HTML. This page covers how to load HTML documents into LangChain Document objects using Unstructured and BeautifulSoup4, with code examples and API references provided.
+[How to: load JSON data](https://python.langchain.com/docs/how_to/document_loader_json/): loading JSON or JSON Lines data into LangChain Documents, or extracting metadata from JSON data. This page explains how to use the JSONLoader to convert JSON and JSONL data into LangChain Documents, including how to extract specific fields into the content and metadata, and provides examples for common JSON structures.
+[How to: load Markdown data](https://python.langchain.com/docs/how_to/document_loader_markdown/): needing to load Markdown files, needing to retain Markdown elements, needing to parse Markdown into components This page covers how to load Markdown files into LangChain documents, including retaining elements like titles and lists, and parsing Markdown into components.
+[How to: load Microsoft Office data](https://python.langchain.com/docs/how_to/document_loader_office_file/): loading Microsoft Office files (DOCX, XLSX, PPTX) into LangChain, when working with Azure AI Document Intelligence. It covers how to use the AzureAIDocumentIntelligenceLoader to load Office documents into LangChain Documents for further processing.
+[How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom/): Line 1: 'creating a custom document loader, working with files, or using the GenericLoader abstraction' Line 2: 'This page explains how to create a custom document loader, work with files using BaseBlobParser and Blob, and use the GenericLoader to combine a BlobLoader with a BaseBlobParser.'
+
+#### Text splitters
+
+[Text Splitters](https://python.langchain.com/docs/concepts/text_splitters/): working with long documents, handling limited model input sizes, or optimizing retrieval systems This page discusses different strategies for splitting large texts into smaller chunks, including length-based, text structure-based, document structure-based, and semantic meaning-based approaches.
+
+[How to: recursively split text](https://python.langchain.com/docs/how_to/recursive_text_splitter/): splitting long text into smaller chunks, processing text from languages without word boundaries like Chinese or Japanese, parsing documents for downstream tasks. Covers how to recursively split text by list of characters like newlines and spaces, and options to customize characters for different languages. Discusses chunk size, overlap, and creating LangChain Document objects.
+[How to: split HTML](https://python.langchain.com/docs/how_to/split_html/): needing to split HTML content into chunks, preserving semantic structure for better context during processing Explains different techniques to split HTML pages like HTMLHeaderTextSplitter, HTMLSectionSplitter, HTMLSemanticPreservingSplitter; covers preserving tables, lists, custom handlers
+[How to: split by character](https://python.langchain.com/docs/how_to/character_text_splitter/): needing to split text by individual characters, needing to control chunk size by character count, needing to handle text with differing chunk sizes. Explains how to split text into chunks by character count, using the CharacterTextSplitter. Covers setting chunk size, overlap, and passing metadata.
+[How to: split code](https://python.langchain.com/docs/how_to/code_splitter/): needing to split code into logical chunks, working with code from specific programming languages, or creating language-specific text splitters. Provides examples of using the RecursiveCharacterTextSplitter to split code from various programming languages like Python, JavaScript, Markdown, and others into document chunks based on language-specific separators.
+[How to: split Markdown by headers](https://python.langchain.com/docs/how_to/markdown_header_metadata_splitter/): splitting markdown files into chunks, handling headers and metadata in markdown files, constraining chunk sizes in markdown files. This page covers how to split markdown files by headers into chunks, handle metadata associated with headers, and constrain chunk sizes using other text splitters like RecursiveCharacterTextSplitter.
+[How to: recursively split JSON](https://python.langchain.com/docs/how_to/recursive_json_splitter/): splitting JSON data into smaller chunks, managing chunk sizes from list content within JSON data. Explains how to split JSON data into smaller chunks while keeping nested objects intact, control chunk sizes, and handle JSON lists by converting them to dictionaries before splitting.
+[How to: split text into semantic chunks](https://python.langchain.com/docs/how_to/semantic-chunker/): building an application that needs to split long text into smaller chunks based on semantic meaning, when working with large documents that need to be broken down into semantically coherent sections, or when needing to control the granularity of text splitting. This page explains how to use the SemanticChunker from LangChain to split text into semantically coherent chunks by leveraging embedding models, with options to control the splitting behavior based on percentile, standard deviation, interquartile range, or gradient of embedding distance.
+[How to: split by tokens](https://python.langchain.com/docs/how_to/split_by_token/): LLM should read this page when: 1) Splitting long text into chunks while counting tokens 2) Handling non-English languages for text splitting 3) Comparing different tokenizers for text splitting 'The page covers how to split text into chunks based on token count using different tokenizers like tiktoken, spaCy, SentenceTransformers, NLTK, KoNLPY (for Korean), and Hugging Face tokenizers. It explains the approaches, usage, and API references for each tokenizer.'
+
+#### Embedding models
+
+[Embedding Models](https://python.langchain.com/docs/concepts/embedding_models/): LLM should read this page when: 1) Working with text embeddings for search/retrieval 2) Comparing text similarity using embedding vectors 3) Selecting or integrating text embedding models It covers key concepts of embedding models: converting text to numerical vectors, measuring similarity between vectors, embedding models (historical context, interface, integrations), and common similarity metrics (cosine, Euclidean, dot product).
+[supported integrations](https://python.langchain.com/docs/integrations/text_embedding/): looking for integrations with embedding models, wanting to compare embedding providers, needing guidance on selecting an embedding model This page documents integrations with various model providers that allow using embeddings in LangChain, covering OpenAI, Azure, Google, AWS, HuggingFace, and other embedding services.
+
+[How to: embed text data](https://python.langchain.com/docs/how_to/embed_text/): it needs to embed text into vectors, when it needs to use text embeddings for tasks like semantic search, and when it needs to understand the interface for text embedding models. This page explains how to use LangChain's Embeddings class to interface with various text embedding model providers, embed documents and queries, and work with the resulting vector representations of text.
+[How to: cache embedding results](https://python.langchain.com/docs/how_to/caching_embeddings/): caching document embeddings to improve performance, caching query embeddings to improve performance, or choosing a data store for caching embeddings. This page covers how to use the CacheBackedEmbeddings class to cache document and query embeddings in a ByteStore, demonstrating its usage with a local file store and an in-memory store. It also explains how to specify the cache namespace to avoid collisions.
+[How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings/): needing to use a custom text embedding model, integrating a new text embedding provider, or contributing a new text embedding integration. The page covers implementing custom text embedding models for LangChain by following the Embeddings interface, providing examples, testing, and contributing guidelines.
+
+#### Vector stores
+
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores/): LLM should read this page when: 1) Building applications that need to index and retrieve information based on semantic similarity 2) Integrating vector databases into their application 3) Exploring advanced vector search and retrieval techniques Vector stores are specialized data stores that enable indexing and retrieving information based on vector representations (embeddings) of data, allowing semantic similarity search over unstructured data like text, images, and audio. The page covers vector store integrations, the core interface, adding/deleting documents, basic and advanced similarity search techniques, and concepts like metadata filtering.
+[supported integrations](https://python.langchain.com/docs/integrations/vectorstores/): Line 1: 'integrating vector stores into applications, deciding which vector store to use, or understanding the capabilities of different vector stores' Line 2: 'This page provides an overview of vector stores, which are used to store embedded data and perform similarity search. It lists the different vector stores integrated with LangChain, along with their key features and capabilities.'
+
+[How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstores/): building applications that require searching over large collections of text, when indexing and retrieving relevant information based on similarity between embeddings, and when working with vector databases and embeddings. The page covers how to create and query vector stores, which are used to store embedded vectors of text and search for similar embeddings. It explains how to initialize different vector store options like Chroma, FAISS, and LanceDB, and how to perform similarity searches on them. It also touches on asynchronous operations with vector stores.
+
+#### Retrievers
+
+[Retrievers](https://python.langchain.com/docs/concepts/retrievers/): building a retrieval system, integrating different retrieval sources, or linking retrieved information to source documents. This page outlines the retriever interface in LangChain, common types of retrievers such as vector stores and search APIs, and advanced retrieval patterns like ensembling and retaining source document information.
+
+[How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstore_retriever/): using vector stores for retrieval, implementing maximum marginal relevance retrieval, or specifying additional search parameters. This page explains how to create a retriever from a vector store, how to use maximum marginal relevance retrieval, and how to pass parameters like similarity score thresholds and top-k results.
+[How to: generate multiple queries to retrieve data for](https://python.langchain.com/docs/how_to/MultiQueryRetriever/): Line 1: 'improving retrieval results for search queries, retrieving documents from a vector database, or using an LLM to generate multiple queries for a given input' Line 2: 'Explains how to use MultiQueryRetriever to automatically generate multiple queries from an input question using an LLM, retrieve documents for each query, and take the unique union of results to improve retrieval performance.'
+[How to: use contextual compression to compress the data retrieved](https://python.langchain.com/docs/how_to/contextual_compression/): [it needs to retrieve relevant information from a large corpus of documents, it needs to filter out irrelevant content from retrieved documents, it needs to compress or shorten documents to focus on query-relevant content] This page discusses contextual compression, a technique that allows retrieving only relevant portions of documents given a query, using various methods like LLM-based extractors/filters, embedding similarity filters, or combinations thereof via pipelines.
+[How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever/): learning how to create a custom retriever, when implementing custom retrieval logic, when adding retrieval capabilities to an application. Explains how to implement a custom Retriever class by extending BaseRetriever, including providing examples and guidelines for contributing custom retrievers.
+[How to: add similarity scores to retriever results](https://python.langchain.com/docs/how_to/add_scores_retriever/): needing to incorporate similarity/relevance scores from retrievers, using vector or multi-vector retrievers, or propagating scores through custom retriever subclasses Shows how to add similarity scores from retrievers like Vector Store Retrievers, SelfQueryRetriever, and MultiVectorRetriever to the metadata of retrieved documents
+[How to: combine the results from multiple retrievers](https://python.langchain.com/docs/how_to/ensemble_retriever/): combining results from multiple retriever algorithms, leveraging different retrieval strengths, or using a hybrid search approach. The page explains how to use the EnsembleRetriever to combine results from sparse and dense retrievers, outlines basic usage, and demonstrates runtime configuration of individual retrievers.
+[How to: reorder retrieved results to mitigate the "lost in the middle" effect](https://python.langchain.com/docs/how_to/long_context_reorder/): looking to improve performance of RAG applications, mitigating the "lost in the middle" effect, reordering retrieved results for longer contexts. Explains how to reorder retrieved documents to position the most relevant at the beginning and end, with less relevant in the middle, helping surface important information for language models.
+[How to: generate multiple embeddings per document](https://python.langchain.com/docs/how_to/multi_vector/): needing to retrieve documents using multiple vector embeddings per document, when working with long documents that need to be split into chunks, when using document summaries for retrieval. This page covers how to index documents using 1) document chunks, 2) summaries generated with an LLM, and 3) hypothetical questions generated with an LLM. It demonstrates the usage of the MultiVectorRetriever to retrieve parent documents based on vector embeddings of chunks/summaries/questions.
+[How to: retrieve the whole document for a chunk](https://python.langchain.com/docs/how_to/parent_document_retriever/): [1) wanting to retrieve larger documents instead of just smaller chunks for context, 2) trying to balance keeping context while splitting long documents] [The page explains how to use the ParentDocumentRetriever, which first splits documents into small chunks for indexing but then retrieves the larger parent documents those chunks came from during retrieval. It shows code examples for retrieving full documents as well as larger chunks rather than full documents.]
+[How to: generate metadata filters](https://python.langchain.com/docs/how_to/self_query/): needing to perform retrieval on documents based on semantic similarity to the query text and metadata filters, integrating the retrieval into a question-answering pipeline. Covers creating a Self Query Retriever which can perform semantic text retrieval and structured metadata filtering in one step, using an underlying vector store and a query constructor LLM chain to parse natural language queries into structured representations.
+[How to: create a time-weighted retriever](https://python.langchain.com/docs/how_to/time_weighted_vectorstore/): it needs to retrieve documents from a vector store considering both semantic similarity and time decay, it needs to simulate time for testing purposes, or it needs to adjust the balance between semantic similarity and recency in retrieving documents. This page explains how to use the TimeWeightedVectorStoreRetriever, which combines semantic similarity scores from a vector store with a time decay factor that reduces the relevance of older documents over time, and provides examples of using different decay rates and mocking time for testing.
+[How to: use hybrid vector and keyword retrieval](https://python.langchain.com/docs/how_to/hybrid/): LLM should read this page when: 1) It needs to perform hybrid search combining vector and other search techniques 2) It uses a vectorstore that supports hybrid search capabilities Explains how to configure and invoke LangChain chains to leverage hybrid search features of vectorstores like Astra DB, ElasticSearch, etc.
+
+#### Indexing
+
+Indexing is the process of keeping your vectorstore in-sync with the underlying data source.
+
+[How to: reindex data to keep your vectorstore in-sync with the underlying data source](https://python.langchain.com/docs/how_to/indexing/): needing to index documents into a vector store, handling content deduplication and document mutations over time, or cleaning up old/deleted documents from the store. Covers the LangChain indexing API workflow, including deletion modes, using document loaders, and setting source metadata for documents to handle mutations and deletions properly.
+
+#### Tools
+
+[Tools](https://python.langchain.com/docs/concepts/tools/): needing an overview of tools in LangChain, wanting to create custom tools, or learning how to pass runtime values to tools. Tools are a way to encapsulate functions with schemas that can be passed to chat models supporting tool calling. The page covers the tool interface, creating tools using the @tool decorator, configuring tool schemas, tool artifacts, special type annotations like InjectedToolArg, and toolkits.
+
+[How to: define a custom tool](https://python.langchain.com/docs/how_to/custom_tools/): creating custom tools for agents, converting functions or runnables to tools, or subclassing BaseTool. This page covers creating tools from functions using the @tool decorator or StructuredTool class, creating tools from Runnables, subclassing BaseTool for custom tools, creating async tools, handling tool errors, and returning artifacts from tool execution.
+[How to: use built-in tools and toolkits](https://python.langchain.com/docs/how_to/tools_builtin/): needing to use built-in LangChain tools or toolkits, needing to customize built-in LangChain tools. This page covers how to use LangChain's built-in tools and toolkits, including customizing tool names, descriptions, and argument schemas. It also explains how to use LangChain toolkits, which are collections of tools for specific tasks.
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: pass tool outputs to chat models](https://python.langchain.com/docs/how_to/tool_results_pass_to_model/): 1) integrating tools with chat models, 2) implementing tool calling functionality, 3) passing tool outputs back to chat models. Demonstrates how to pass tool function outputs back to chat models as tool messages, allowing the model to incorporate tool results in generating a final response.
+[How to: pass run time values to tools](https://python.langchain.com/docs/how_to/tool_runtime/): it needs to pass runtime values to tools, when it needs to prevent an LLM from generating certain tool arguments, and when it needs to inject arguments directly at runtime. This page explains how to use the InjectedToolArg annotation to mark certain parameters of a Tool as being injected at runtime, preventing the LLM from generating those arguments. It also shows how to inject the arguments at runtime and create a tool-executing chain.
+[How to: add a human-in-the-loop for tools](https://python.langchain.com/docs/how_to/tools_human/): adding human approval to tool calling, allowing human intervention in a workflow, or setting up fail-safes for sensitive operations. This page demonstrates how to add a human-in-the-loop step to approve or reject tool calls made by an LLM in a tool-calling chain using LangChain.
+[How to: handle tool errors](https://python.langchain.com/docs/how_to/tools_error/): needing to handle errors that occur when tools are called by an LLM, when building fault tolerance into tool-calling chains, or when enabling self-correction for tool calling errors. The page covers strategies like try/except for tool calls, fallbacks to different models, retrying with exceptions passed to the LLM, and creating custom tool exceptions.
+[How to: force models to call a tool](https://python.langchain.com/docs/how_to/tool_choice/): needing to force an LLM to call a specific tool, needing to force an LLM to call at least one tool This page shows how to use the tool_choice parameter to force an LLM to call a specific tool or to call at least one tool from a set of available tools.
+[How to: disable parallel tool calling](https://python.langchain.com/docs/how_to/tool_calling_parallel/): considering disabling parallel tool calling, when looking for examples on parallel vs. single tool calls, when trying to control the number of tool calls made. Explains how to disable parallel tool calling in LangChain so that only one tool is called at a time, providing code examples.
+[How to: access the `RunnableConfig` from a tool](https://python.langchain.com/docs/how_to/tool_configure/): accessing or configuring runtime behavior of sub-runnables from a custom tool, streaming events from child runnables within a tool This page explains how to access the RunnableConfig from within a custom tool to configure sub-invocations and stream events from those sub-invocations
+[How to: stream events from a tool](https://python.langchain.com/docs/how_to/tool_stream_events/): Line 1: 'it needs to stream events from a tool, when it needs to configure tools to access internal runnables, or when it needs to propagate configurations to child runnables in async environments' Line 2: 'Guide on how to stream events from tools that call chat models, retrievers, or other runnables, by accessing internal events and propagating configurations, with examples and explanations for compatibility across Python versions'
+[How to: return artifacts from a tool](https://python.langchain.com/docs/how_to/tool_artifacts/): returning structured data from a tool, passing artifacts to downstream components, handling custom data types from tools This page explains how tools can return artifacts separate from model input, allowing custom objects, dataframes, or images to be passed to downstream components while limiting model exposure.
+[How to: convert Runnables to tools](https://python.langchain.com/docs/how_to/convert_runnable_to_tool/): Line 1: 'needing to convert a Python function or Runnable into a LangChain tool, when building an agent that calls external tools, or when integrating a custom tool into a chat model' Line 2: 'Demonstrates how to use the Runnable.as_tool() method to convert a Runnable to a tool with a name, description, and arguments schema. Includes examples of agents calling tools created from Runnables.'
+[How to: add ad-hoc tool calling capability to models](https://python.langchain.com/docs/how_to/tools_prompting/): LLM should read this page when: 1) Adding ad-hoc tool calling capability to chat models/LLMs, 2) Using models not fine-tuned for tool calling, 3) Invoking custom tools from LLMs 'This guide demonstrates how to create prompts that instruct LLMs to request tool invocations, parse the LLM output to extract tool and arguments, invoke the requested tool, and return the tool output.'
+[How to: pass runtime secrets to a runnable](https://python.langchain.com/docs/how_to/runnable_runtime_secrets/): needing to pass sensitive data to a runnable, ensuring secrets remain hidden from tracing, or integrating secret values with runnables. Explains how to pass runtime secrets to runnables using RunnableConfig, allowing certain keys to be hidden from tracing while still being accessible during invocation.
+
+#### Multimodal
+
+[How to: pass multimodal data directly to models](https://python.langchain.com/docs/how_to/multimodal_inputs/): needing to pass multimodal data (images, videos, etc.) to models, when working with models that support multimodal input and tool calling capabilities, and when looking to understand how to encode and pass different types of multimodal data. This page demonstrates how to pass multimodal input like images directly to LLMs and chat models, covering encoding techniques, passing single/multiple images, and invoking models with image/multimodal content. It also shows how to use multimodal models for tool calling.
+[How to: use multimodal prompts](https://python.langchain.com/docs/how_to/multimodal_prompts/): wanting to pass multimodal data like images to an LLM, when wanting to send multiple pieces of multimodal data to an LLM, when wanting instructions on how to format multimodal prompts. This shows how to use prompt templates to format multimodal inputs like images to models that support it, including sending multiple images, and comparing images.
+
+#### Agents
+
+:::note
+
+[LangGraph](https://langchain-ai.github.io/langgraph/): learning about LangGraph, considering using LangGraph for an AI application, or deciding between LangGraph and alternatives. Overview of LangGraph as an open-source framework for building AI agents, its key features like reliability and customizability, its ecosystem integration with other LangChain products, and additional learning resources.
+
+:::
+
+[How to: use legacy LangChain Agents (AgentExecutor)](https://python.langchain.com/docs/how_to/agent_executor/): building agents with specific tools, when working with chat history, when using language models for tool calling. This page explains how to build agents with AgentExecutor that can call tools like search engines and retrievers, how to add chat history to agents, and how to use language models to determine which tools to call.
+[How to: migrate from legacy LangChain agents to LangGraph](https://python.langchain.com/docs/how_to/migrate_agent/): LLM should read this page when: 1) Migrating from legacy LangChain agents to LangGraph 2) Comparing the functionality of LangChain and LangGraph agents This page provides a detailed guide on migrating from legacy LangChain agents to LangGraph agents, covering topics such as basic usage, prompt templates, memory handling, iterating through steps, dealing with intermediate steps, setting iteration and execution time limits, early stopping methods, and trimming intermediate steps.
+
+#### Callbacks
+
+[Callbacks](https://python.langchain.com/docs/concepts/callbacks/): [needing to log, monitor, or stream events in an LLM application] [This page covers LangChain's callback system, which allows hooking into various stages of an LLM application for logging, monitoring, streaming, and other purposes. It explains the different callback events, callback handlers, and how to pass callbacks.]
+
+[How to: pass in callbacks at runtime](https://python.langchain.com/docs/how_to/callbacks_runtime/): needing to pass callback handlers at runtime to capture events, needing to attach handlers to nested objects This page explains how to pass callback handlers at runtime when invoking a runnable, which allows capturing events from all nested objects without manually attaching handlers.
+[How to: attach callbacks to a module](https://python.langchain.com/docs/how_to/callbacks_attach/): attaching callbacks to a runnable, reusing callbacks across multiple executions, composing a chain of runnables This page explains how to attach callbacks to a runnable using the .with_config() method, allowing callbacks to be reused across multiple executions and propagated to child components in a chain of runnables.
+[How to: pass callbacks into a module constructor](https://python.langchain.com/docs/how_to/callbacks_constructor/): LLM should read this page when: 1) Implementing callbacks in LangChain, 2) Understanding the scope of constructor callbacks, 3) Deciding whether to use constructor or runtime callbacks 'This page explains how to pass callbacks into the constructor of LangChain objects, and that constructor callbacks are scoped only to the object they are defined on, not inherited by child objects.'
+[How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks/): creating custom behavior for LangChain components, customizing callback events, implementing event handlers This page explains how to create custom callback handlers by implementing callback methods and attaching the handler to LangChain components
+[How to: use callbacks in async environments](https://python.langchain.com/docs/how_to/callbacks_async/): needing to use callbacks in async environments, handling sync callbacks in async methods, using AsyncCallbackHandler Covers using callbacks with async APIs, avoiding blocking with AsyncCallbackHandler, propagating callbacks in async runnables, example of sync and async callback handlers
+[How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events/): dispatching custom callback events, handling async or sync custom callback events, or consuming custom events via the astream events API. This page covers how to dispatch custom callback events from within a Runnable, consume these events via async/sync callback handlers, and access custom events through the astream events API.
+
+#### Custom
+
+All of LangChain components can easily be extended to support your own versions.
+
+[How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model/): creating a custom chat model class, integrating a new language model as a chat model, or implementing streaming for a chat model. This page explains how to create a custom chat model class by inheriting from BaseChatModel, and implementing methods like _generate and _stream. It covers handling inputs, messages, streaming, identifying parameters, and contributing custom chat models.
+[How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm/): creating a custom LLM class, wrapping their own LLM provider, integrating with a new language model not yet supported by LangChain. This page explains how to create a custom LLM class by implementing the required _call and _llm_type methods, as well as optional methods like _identifying_params, _acall, _stream, and _astream. It provides an example implementation, demonstrates testing and integration with LangChain APIs, and offers guidance for contributing custom LLM integrations.
+[How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings/): needing to use a custom text embedding model, integrating a new text embedding provider, or contributing a new text embedding integration. The page covers implementing custom text embedding models for LangChain by following the Embeddings interface, providing examples, testing, and contributing guidelines.
+[How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever/): learning how to create a custom retriever, when implementing custom retrieval logic, when adding retrieval capabilities to an application. Explains how to implement a custom Retriever class by extending BaseRetriever, including providing examples and guidelines for contributing custom retrievers.
+[How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom/): Line 1: 'creating a custom document loader, working with files, or using the GenericLoader abstraction' Line 2: 'This page explains how to create a custom document loader, work with files using BaseBlobParser and Blob, and use the GenericLoader to combine a BlobLoader with a BaseBlobParser.'
+[How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom/): Line 1: 'creating a custom output parser, implementing a custom parser by inheriting from base classes, or parsing raw model outputs' Line 2: 'Covers how to create custom output parsers using runnable lambdas/generators (recommended) or by inheriting from base parser classes like BaseOutputParser and BaseGenerationOutputParser. Includes examples for simple and more complex parsing scenarios.'
+[How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks/): creating custom behavior for LangChain components, customizing callback events, implementing event handlers This page explains how to create custom callback handlers by implementing callback methods and attaching the handler to LangChain components
+[How to: define a custom tool](https://python.langchain.com/docs/how_to/custom_tools/): creating custom tools for agents, converting functions or runnables to tools, or subclassing BaseTool. This page covers creating tools from functions using the @tool decorator or StructuredTool class, creating tools from Runnables, subclassing BaseTool for custom tools, creating async tools, handling tool errors, and returning artifacts from tool execution.
+[How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events/): dispatching custom callback events, handling async or sync custom callback events, or consuming custom events via the astream events API. This page covers how to dispatch custom callback events from within a Runnable, consume these events via async/sync callback handlers, and access custom events through the astream events API.
+
+#### Serialization
+
+[How to: save and load LangChain objects](https://python.langchain.com/docs/how_to/serialization/): needing to save and reload LangChain objects, handle API keys securely when serializing/deserializing objects, and maintain compatibility when deserializing objects across different versions of LangChain. This page discusses how to save and load serializable LangChain objects like chains, messages, and documents using the dump/load functions, which separate API keys and ensure cross-version compatibility. Examples are provided for serializing/deserializing to JSON strings, Python dicts, and disk files.
+
+## Use cases
+
+These guides cover use-case specific details.
+
+### Q&A with RAG
+
+Retrieval Augmented Generation (RAG) is a way to connect LLMs to external sources of data.
+[this guide](https://python.langchain.com/docs/tutorials/rag/): building a retrieval-augmented question-answering system, when needing to index and search through unstructured data sources, when learning about key concepts like document loaders, text splitters, vector stores, and retrievers. This tutorial covers how to build a Q&A application over textual data by loading documents, splitting them into chunks, embedding and storing the chunks in a vector store, retrieving relevant chunks for a user query, and generating an answer using a language model with the retrieved context.
+
+[How to: add chat history](https://python.langchain.com/docs/how_to/qa_chat_history_how_to/): building a conversational question-answering application, incorporating chat history and retrieval from external knowledge sources, and deciding between using chains or agents for the application logic. Discusses building chat applications with LangChain by using chains for predictable retrieval steps or agents for more dynamic reasoning. Covers setting up components like embeddings and vector stores, constructing chains with tool calls for retrieval, and assembling LangGraph agents with a ReAct executor. Provides examples for testing the applications.
+[How to: stream](https://python.langchain.com/docs/how_to/qa_streaming/): LLM should read this page when: 1) Building a RAG (Retrieval Augmented Generation) application that requires streaming final outputs or intermediate steps 2) Integrating streaming capabilities into an existing LLM-based application 'The page provides guidance on how to stream final outputs and intermediate steps from a RAG (Retrieval Augmented Generation) application built with LangChain and LangGraph. It covers setting up the necessary components, constructing the RAG application, and utilizing different streaming modes to stream tokens from the final output or individual state updates from each step.'
+[How to: return sources](https://python.langchain.com/docs/how_to/qa_sources/): LLM should read this page when: 1) Building a question-answering (QA) application that needs to return the sources used to generate the answer. 2) Implementing a conversational QA system with retrieval-augmented generation (RAG). 3) Structuring model outputs to include sources or citations. 'This guide explains how to configure LangChain's QA and RAG workflows to retrieve and return the source documents or citations used to generate the final answer. It covers both basic RAG and conversational RAG architectures, and demonstrates techniques for structuring the model output to include source information.'
+[How to: return citations](https://python.langchain.com/docs/how_to/qa_citations/): seeking to add citations to results from a Retrieval Augmented Generation (RAG) application, when wanting to justify an answer using source material, and when needing to provide evidence for generated outputs. The page covers various methods for getting a RAG application to cite sources used in generating answers, including tool-calling to return source IDs or text snippets, direct prompting to generate structured outputs with citations, retrieving and compressing context to minimize need for citations, and post-processing generated answers to annotate with citations.
+[How to: do per-user retrieval](https://python.langchain.com/docs/how_to/qa_per_user/): needing to configure retrieval chains for per-user data access, wanting to limit document access for different users, or building retrieval applications with multi-tenant architectures. Explains how to configure retriever search kwargs to limit retrieved documents based on user, demonstrates code example using Pinecone namespace for multi-tenancy.
+
+
+### Extraction
+
+Extraction is when you use LLMs to extract structured information from unstructured text.
+[this guide](https://python.langchain.com/docs/tutorials/extraction/): building information extraction applications, understanding how to use reference examples for improving extraction performance, or when needing to extract structured data from unstructured text. This tutorial covers building an information extraction chain using LangChain, defining schemas for extracting structured data, using reference examples to improve extraction quality, and extracting multiple entities from text.
+
+[How to: use reference examples](https://python.langchain.com/docs/how_to/extraction_examples/): wanting to use reference examples to improve extraction quality, wanting to structure example inputs and outputs for extraction, wanting to test an extraction model with and without examples. This page explains how to define reference examples in the format expected for the LangChain tool calling API, how to incorporate these examples into prompts, and how using examples can improve extraction performance compared to not using examples.
+[How to: handle long text](https://python.langchain.com/docs/how_to/extraction_long_text/): working with large documents or PDFs that exceed the context window of the LLM, when needing to extract structured information from text. This page covers strategies for handling long text when doing information extraction, including a brute force approach of chunking the text and extracting from each chunk, and a retrieval-augmented generation (RAG) approach of indexing the chunks and only extracting from relevant ones. It also discusses common issues with these approaches.
+[How to: do extraction without using function calling](https://python.langchain.com/docs/how_to/extraction_parse/): looking to extract structured data from text, when needing to parse model outputs into objects, or when wanting to avoid using tool calling methods for extraction tasks. This page explains how to use prompting instructions to get LLMs to generate outputs in a structured format like JSON, and then use output parsers to convert the model responses into Python objects.
+
+### Chatbots
+
+Chatbots involve using an LLM to have a conversation.
+[this guide](https://python.langchain.com/docs/tutorials/chatbot/): building a chatbot application, incorporating conversational history, or using prompt templates. This page demonstrates how to build a chatbot with LangChain, including adding message persistence, prompt templates, conversation history management, and response streaming.
+
+[How to: manage memory](https://python.langchain.com/docs/how_to/chatbots_memory/): LLM should read this page when: 1) Building a chatbot and wants to incorporate memory (chat history) 2) Looking to add context from previous messages to improve responses 3) Needs techniques to handle long conversations by summarizing or trimming history 'The page covers different techniques to add memory capabilities to chatbots, including passing previous messages directly, automatic history management using LangGraph persistence, trimming messages to reduce context, and generating summaries of conversations. Examples in Python are provided for each approach.'
+[How to: do retrieval](https://python.langchain.com/docs/how_to/chatbots_retrieval/): building a retrieval-augmented chatbot, adding conversational context to retrieval queries, or streaming responses from a chatbot. This page covers setting up a retriever over a document corpus, creating document chains and retrieval chains, transforming queries for better retrieval, and streaming responses from the retrieval chain.
+[How to: use tools](https://python.langchain.com/docs/how_to/chatbots_tools/): looking to integrate tools into chatbots, when using agents with tools, when incorporating web search into conversational agents. The page covers how to create a conversational agent using LangChain that can interact with APIs and web search tools, while maintaining chat history. It demonstrates setting up a ReAct agent with a Tavily search tool, invoking the agent, handling conversational responses with chat history, and adding memory.
+[How to: manage large chat history](https://python.langchain.com/docs/how_to/trim_messages/): working with long chat histories, when concerned about token limits for chat models, when implementing token management strategies. This page explains how to use the trim_messages utility to reduce the size of a chat message history to fit within token limits, covering trimming by token count or message count, and allowing customization of trimming strategies.
+
+### Query analysis
+
+Query Analysis is the task of using an LLM to generate a query to send to a retriever.
+[this guide](https://python.langchain.com/docs/tutorials/rag/#query-analysis): LLM should read this page when: 1) Building a question-answering application over unstructured data 2) Learning about Retrieval Augmented Generation (RAG) architectures 3) Indexing data for use with LLMs 'This tutorial covers building a Retrieval Augmented Generation (RAG) application that can answer questions based on ingested data. It walks through loading data, chunking it, embedding and storing it in a vector store, retrieving relevant chunks for a given query, and generating an answer using an LLM. It also shows how to incorporate query analysis for improved retrieval.'
+
+[How to: add examples to the prompt](https://python.langchain.com/docs/how_to/query_few_shot/): needing to guide an LLM to generate queries, when fine-tuning an LLM for query generation, when incorporating examples into few-shot prompts. This page covers how to add examples to prompts for query analysis in LangChain, including setting up the system, defining the query schema, generating queries, and tuning prompts by adding examples.
+[How to: handle cases where no queries are generated](https://python.langchain.com/docs/how_to/query_no_queries/): querying for information, handling cases where no queries are generated, integrating query analysis with retrieval. Provides guidance on handling scenarios where query analysis techniques allow for no queries to be generated, including code examples for structuring the output, performing query analysis with an LLM, and integrating query analysis with a retriever in a chain.
+[How to: handle multiple queries](https://python.langchain.com/docs/how_to/query_multiple_queries/): handling queries that generate multiple potential queries, combining retrieval results from multiple queries, and integrating query analysis with retrieval pipelines. Explains how to handle scenarios where a query analysis step produces multiple potential queries by running retrievals for each query and combining the results. Demonstrates this approach with code examples using LangChain components.
+[How to: handle multiple retrievers](https://python.langchain.com/docs/how_to/query_multiple_retrievers/): needing to handle multiple retrievers for query analysis, when implementing a query analyzer that can select between different retrievers, when building a retrieval-augmented system that needs to choose between different data sources. This page explains how to handle scenarios where a query analysis step allows for selecting between multiple retrievers, showing an example implementation using LangChain's tools for structured output parsing, prompting, and chaining components together.
+[How to: construct filters](https://python.langchain.com/docs/how_to/query_constructing_filters/): constructing filters for query analysis, translating filters to specific retriever formats, using LangChain's structured query objects. This page covers how to construct filters as Pydantic models and translate them into retriever-specific filters using LangChain's translators for Chroma and Elasticsearch.
+[How to: deal with high cardinality categorical variables](https://python.langchain.com/docs/how_to/query_high_cardinality/): dealing with categorical data with high cardinality, handling potential misspellings of categorical values, and filtering based on categorical values. The page discusses techniques for handling high-cardinality categorical data in query analysis, such as adding all possible values to the prompt, using a vector store to find relevant values, and correcting user input to the closest valid categorical value.
+
+### Q&A over SQL + CSV
+
+You can use LLMs to do question answering over tabular data.
+[this guide](https://python.langchain.com/docs/tutorials/sql_qa/): LLM should read this page when: 1. Building a question-answering system over a SQL database 2. Implementing agents or chains to interact with a SQL database 'This tutorial covers building question-answering systems over SQL databases using LangChain. It demonstrates creating chains and agents that can generate SQL queries from natural language, execute them against a database, and provide natural language responses. It covers techniques like schema exploration, query validation, and handling high-cardinality columns.'
+
+[How to: use prompting to improve results](https://python.langchain.com/docs/how_to/sql_prompting/): 'querying SQL databases with a language model, when doing few-shot prompting for SQL queries, and when selecting relevant few-shot examples dynamically.' 'This page covers how to improve SQL query generation prompts by incorporating database schema information, providing few-shot examples, and dynamically selecting the most relevant few-shot examples using semantic similarity.'
+[How to: do query validation](https://python.langchain.com/docs/how_to/sql_query_checking/): Line 1: 'working on SQL query generation, handling invalid SQL queries, or incorporating human approval for SQL queries' Line 2: 'This page covers strategies for validating SQL queries, such as appending a query validator step, prompt engineering, human-in-the-loop approval, and error handling.'
+[How to: deal with large databases](https://python.langchain.com/docs/how_to/sql_large_db/): dealing with large databases in SQL question-answering, identifying relevant table schemas to include in prompts, and handling high-cardinality columns with proper nouns or other unique values. The page discusses methods to identify relevant tables and table schemas to include in prompts when dealing with large databases. It also covers techniques to handle high-cardinality columns containing proper nouns or other unique values, such as creating a vector store of distinct values and querying it to include relevant spellings in prompts.
+[How to: deal with CSV files](https://python.langchain.com/docs/how_to/sql_csv/): needing to build question-answering systems over CSV data, wanting to understand the tradeoffs between using SQL or Python libraries like Pandas, and requiring guidance on securely executing code from language models. This page covers two main approaches to question answering over CSV data: using SQL by loading CSVs into a database, or giving an LLM access to Python environments to interact with CSV data using libraries like Pandas. It discusses the security implications of each approach and provides code examples for implementing question-answering chains and agents with both methods.
+
+### Q&A over graph databases
+
+You can use an LLM to do question answering over graph databases.
+[this guide](https://python.langchain.com/docs/tutorials/graph/): LLM should read this page when: 1) Building a question-answering system over a graph database 2) Implementing text-to-query generation for graph databases 3) Learning techniques for query validation and error handling 'This page covers building a question-answering application over a graph database using LangChain. It provides a basic implementation using the GraphQACypherChain, followed by an advanced implementation with LangGraph. The latter includes techniques like few-shot prompting, query validation, and error handling for generating accurate Cypher queries from natural language.'
+
+[How to: add a semantic layer over the database](https://python.langchain.com/docs/how_to/graph_semantic/): needing to add a semantic layer over a graph database, needing to use tools representing Cypher templates with an LLM, or needing to build a LangGraph Agent to interact with a Neo4j database. This page covers how to create custom tools with Cypher templates for a Neo4j graph database, bind those tools to an LLM, and build a LangGraph Agent that can invoke the tools to retrieve information from the graph database.
+[How to: construct knowledge graphs](https://python.langchain.com/docs/how_to/graph_constructing/): constructing knowledge graphs from unstructured text, storing information in a graph database, using LLM Graph Transformer to extract knowledge from text. This page explains how to set up a Neo4j graph database, use LLMGraphTransformer to extract structured knowledge graph data from text, filter extracted nodes/relationships, and store the knowledge graph in Neo4j.
+
+### Summarization
+
+LLMs can summarize and otherwise distill desired information from text, including
+[this guide](https://python.langchain.com/docs/tutorials/summarization/): needing to summarize long texts or documents, when building question-answering systems, when creating text analysis applications. This page covers summarizing texts using LangChain, including the "stuff" method (concatenating into single prompt), the "map-reduce" method (splitting into chunks for parallel summarization), and orchestrating these methods using LangGraph.
+
+[How to: summarize text in a single LLM call](https://python.langchain.com/docs/how_to/summarize_stuff/): looking to summarize text, seeking a simple single-LLM summarization method, or exploring basic summarization chains in LangChain. This page outlines how to use LangChain's pre-built 'stuff' summarization chain, which stuffs text into a prompt for an LLM to summarize in a single call.
+[How to: summarize text through parallelization](https://python.langchain.com/docs/how_to/summarize_map_reduce/): needing to summarize long text documents using parallelization, needing to optimize summarization for large volumes of text, and needing efficient summarization strategies. This page discusses using a map-reduce strategy to summarize text through parallelization, including breaking the text into subdocuments, generating summaries for each in parallel (map step), and then consolidating the summaries into a final summary (reduce step). It provides code examples using LangChain and LangGraph.
+[How to: summarize text through iterative refinement](https://python.langchain.com/docs/how_to/summarize_refine/): LLM should read this page when: 1. Attempting to summarize long texts through iterative refinement 2. Learning about building applications with LangGraph 3. Seeking examples of streaming LLM outputs 'This guide demonstrates how to summarize text through iterative refinement using LangGraph. It involves splitting the text into documents, summarizing the first document, and then refining the summary based on subsequent documents until finished. The approach leverages LangGraph's streaming capabilities and modularity.'
+
+## LangChain Expression Language (LCEL)
+
+[LCEL](https://python.langchain.com/docs/concepts/lcel/): needing an overview of the LangChain Expression Language (LCEL), deciding whether to use LCEL or not, and understanding how to compose chains using LCEL primitives. Provides an overview of the LCEL, a declarative approach to building chains from existing Runnables, covering its benefits, composition primitives like RunnableSequence and RunnableParallel, the composition syntax, automatic type coercion, and guidance on when to use LCEL versus alternatives like LangGraph.
+
+[**LCEL cheatsheet**](https://python.langchain.com/docs/how_to/lcel_cheatsheet/): 'needing a reference for interacting with Runnables in LangChain or building custom runnables and chains' 'This page provides a comprehensive cheatsheet with examples for key operations with Runnables such as invoking, batching, streaming, composing, configuring, and dynamically building runnables and chains'
+
+[**Migration guide**](https://python.langchain.com/docs/versions/migrating_chains/): migrating older chains from LangChain v0.0, reimplementing legacy chains, or upgrading to use LCEL and LangGraph This page provides guidance on migrating from deprecated v0.0 chain implementations to using LCEL and LangGraph, including specific guides for various legacy chains like LLMChain, ConversationChain, RetrievalQA, and others.
+
+[How to: chain runnables](https://python.langchain.com/docs/how_to/sequence/): chaining multiple LangChain components together, composing prompt templates with models, or combining runnables in a sequence. This page explains how to chain runnables (LangChain components) together using the pipe operator '|' or the .pipe() method, including chaining prompt templates with models and parsers, and how input/output formats are coerced during chaining.
+[How to: stream runnables](https://python.langchain.com/docs/how_to/streaming/): Line 1: 'wanting to learn how to stream LLM responses, stream intermediate steps, and configure streaming events.' Line 2: 'This page covers how to use the `stream` and `astream` methods to stream final outputs, how to use `astream_events` to stream both final outputs and intermediate steps, filtering events, propagating callbacks for streaming, and working with input streams.'
+[How to: invoke runnables in parallel](https://python.langchain.com/docs/how_to/parallel/): parallelizing steps in a chain, formatting data for chaining, or splitting inputs to run multiple runnables in parallel. Explains how to use RunnableParallel to execute runnables concurrently, format data between steps, and provides examples of parallelizing chains.
+[How to: add default invocation args to runnables](https://python.langchain.com/docs/how_to/binding/): LLM should read this page when: 1) Wanting to invoke a Runnable with constant arguments not part of the preceding output or user input 2) Needing to bind provider-specific arguments like stop sequences or tools 'This page explains how to use the Runnable.bind() method to set default invocation arguments for a Runnable within a RunnableSequence. It covers binding stop sequences to language models and attaching OpenAI tools.'
+[How to: turn any function into a runnable](https://python.langchain.com/docs/how_to/functions/): needing to use custom functions, needing to implement streaming, needing to pass metadata to runnables Covers how to use custom functions as Runnables, including constructors, decorators, coercion, passing metadata, and implementing streaming.
+[How to: pass through inputs from one chain step to the next](https://python.langchain.com/docs/how_to/passthrough/): needing to pass data from one step to the next in a chain, when formatting inputs for prompts, when retrieving and preparing context for prompts. This page explains how to use RunnablePassthrough and RunnableParallel to pass data unchanged through chains, covering examples like formatting retrieval results and user inputs into prompts.
+[How to: configure runnable behavior at runtime](https://python.langchain.com/docs/how_to/configure/): configuring chain internals at runtime, swapping models or prompts within a chain, or exploring different configurations of runnables. The page covers how to use .configurable_fields to configure parameters of a runnable at runtime, and .configurable_alternatives to swap out runnables with alternatives, including examples for chat models, prompts, and combinations thereof.
+[How to: add message history (memory) to a chain](https://python.langchain.com/docs/how_to/message_history/): building a chatbot or multi-turn application, wanting to persist conversational state, wanting to manage message history This page explains how to add message history and persist conversational state using LangGraph, covering examples with chat models and prompt templates, and managing the message history.
+[How to: route between sub-chains](https://python.langchain.com/docs/how_to/routing/): LLM should read this page when: - It needs to conditionally route between sub-chains based on previous outputs - It needs to use semantic similarity to choose the most relevant prompt for a given query 'The page covers how to route between sub-chains in LangChain, including using custom functions, RunnableBranch, and semantic similarity for prompt routing. It provides code examples for each method.'
+[How to: create a dynamic (self-constructing) chain](https://python.langchain.com/docs/how_to/dynamic_chain/): developing dynamic chains, implementing conditional routing, returning runnables dynamically The page explains how to create a dynamic chain that constructs parts of itself at runtime by having Runnable Lambdas return other Runnables.
+[How to: inspect runnables](https://python.langchain.com/docs/how_to/inspect/): inspecting internals of an LCEL chain, debugging chain logic, or retrieving chain prompts. Provides methods to visualize chain graphs, print prompts used in chains, and inspect chain steps programmatically.
+[How to: add fallbacks to a runnable](https://python.langchain.com/docs/how_to/fallbacks/): needing to add fallback options in case of errors, processing long inputs, or wanting to use a better model. This page explains how to configure fallback chains for LLM APIs in case of rate limiting or errors, for handling long input texts exceeding context windows, and for defaulting to better models when parsing fails.
+[How to: pass runtime secrets to a runnable](https://python.langchain.com/docs/how_to/runnable_runtime_secrets/): needing to pass sensitive data to a runnable, ensuring secrets remain hidden from tracing, or integrating secret values with runnables. Explains how to pass runtime secrets to runnables using RunnableConfig, allowing certain keys to be hidden from tracing while still being accessible during invocation.
+
+Tracing gives you observability inside your chains and agents, and is vital in diagnosing issues.
+
+[How to: trace with LangChain](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain/): tracing LangChain applications with LangSmith, customizing trace metadata and run names, or integrating LangChain with the LangSmith SDK. Provides guides on integrating LangSmith tracing into LangChain applications, configuring trace metadata and run names, distributed tracing, interoperability between LangChain and LangSmith SDK, and tracing LangChain invocations without environment variables.
+[How to: add metadata and tags to traces](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain/#add-metadata-and-tags-to-traces): tracing LangChain applications with LangSmith, when logging metadata and tags to traces, and when customizing trace names and IDs. This page provides step-by-step guides on integrating LangSmith tracing with LangChain in Python and JS/TS, covering quick start instructions, selective tracing, logging to specific projects, adding metadata/tags, customizing run names/IDs, accessing run IDs, distributed tracing in Python, and interoperability with the LangSmith SDK.
+
+[in this section of the LangSmith docs](https://docs.smith.langchain.com/how_to_guides/tracing/): configuring observability for LLM applications, accessing and managing traces, and setting up automation and monitoring. Guides on configuring tracing, using the UI/API for traces, creating dashboards, automating rules/alerts, and gathering human feedback for LLM applications.
+
+## Integrations
+
+### Featured Chat Model Providers
+
+- [ChatAnthropic](https://python.langchain.com/docs/integrations/chat/anthropic/)
+- [ChatMistralAI](https://python.langchain.com/docs/integrations/chat/mistralai/)
+- [ChatFireworks](https://python.langchain.com/docs/integrations/chat/fireworks/)
+- [AzureChatOpenAI](https://python.langchain.com/docs/integrations/chat/azure_chat_openai/)
+- [ChatOpenAI](https://python.langchain.com/docs/integrations/chat/openai/)
+- [ChatTogether](https://python.langchain.com/docs/integrations/chat/together/)
+- [ChatVertexAI](https://python.langchain.com/docs/integrations/chat/google_vertex_ai_palm/)
+- [ChatGoogleGenerativeAI](https://python.langchain.com/docs/integrations/chat/google_generative_ai/)
+- [ChatGroq](https://python.langchain.com/docs/integrations/chat/groq/)
+- [ChatCohere](https://python.langchain.com/docs/integrations/chat/cohere/)
+- [ChatBedrock](https://python.langchain.com/docs/integrations/chat/bedrock/)
+- [ChatHuggingFace](https://python.langchain.com/docs/integrations/chat/huggingface/)
+- [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/)
+- [ChatOllama](https://python.langchain.com/docs/integrations/chat/ollama/)
+- [ChatLlamaCpp](https://python.langchain.com/docs/integrations/chat/llamacpp/)
+- [ChatAI21](https://python.langchain.com/docs/integrations/chat/ai21/)
+- [ChatUpstage](https://python.langchain.com/docs/integrations/chat/upstage/)
+- [ChatDatabricks](https://python.langchain.com/docs/integrations/chat/databricks/)
+- [ChatWatsonx](https://python.langchain.com/docs/integrations/chat/ibm_watsonx/)
+- [ChatXAI](https://python.langchain.com/docs/integrations/chat/xai/)
+
+[All](https://python.langchain.com/docs/integrations/chat/): integrating chat models into an application, using chat models for conversational AI tasks, or choosing between different chat model providers. Provides an overview of chat models integrated with LangChain, including OpenAI, Anthropic, Google, and others. Covers key features like tool calling, structured output, JSON mode, local usage, and multimodal support.
+
+## Glossary
+
+[AIMessageChunk](https://python.langchain.com/docs/concepts/messages/#aimessagechunk): 'needing to understand messages and message structure for chat models, when working with chat history, and when integrating with chat model providers' Line 2: 'Detailed overview of the different message types used in LangChain for chat models, how messages are structured, and how to convert between LangChain and OpenAI message formats.'
+[AIMessage](https://python.langchain.com/docs/concepts/messages/#aimessage): building chat applications, when implementing tool calling, or when working with chat model outputs. Messages are the units of communication in chat models, representing input, output and metadata; topics include message types, roles, content, metadata, conversation structure, and LangChain's unified message format.
+[astream_events](https://python.langchain.com/docs/concepts/chat_models/#key-methods): LLM should read this page when: 1) Implementing an application that uses a chat model 2) Integrating chat models with other LangChain components 3) Planning for advanced chat model features like tool calling or structured outputs This page provides an overview of chat models in LangChain, including their key features, interfaces, integration options, tool calling, structured outputs, multimodality, context windows, and advanced topics like rate limiting and caching.
+[BaseTool](https://python.langchain.com/docs/concepts/tools/#tool-interface): needing to understand LangChain tools, wanting to create custom tools, or looking for best practices for designing tools. The page covers the tool abstraction in LangChain, which associates a Python function with a schema for name, description, and arguments. It explains how to create tools using the @tool decorator, configure the schema, handle tool artifacts, use special type annotations (InjectedToolArg, RunnableConfig), and provides an overview of toolkits.
+[invoke](https://python.langchain.com/docs/concepts/runnables/): learning how to use the Runnable interface, when working with custom Runnables, and when needing to configure Runnables at runtime. The page covers the Runnable interface, its methods for invocation, batching, streaming, inspecting schemas, and configuration. It explains RunnableConfig, custom Runnables, and configurable Runnables.
+[bind_tools](https://python.langchain.com/docs/concepts/tool_calling/#tool-binding): building applications that require an LLM to directly interact with external systems or APIs, when integrating tools or functions into an LLM workflow, or when fine-tuning an LLM to better handle tool calling. This page provides an overview of tool calling, which allows LLMs to invoke external tools or APIs with specific input schemas. It covers key concepts like tool creation, binding tools to LLMs, initiating tool calls from LLMs, and executing the called tools. It also offers guidance on recommended usage and best practices.
+[Caching](https://python.langchain.com/docs/concepts/chat_models/#caching): building chat applications, using LLMs for information extraction, or working with multimodal data This page discusses chat models, which are language models that operate on messages. It covers chat model interfaces, integrations, features like tool calling and structured outputs, multimodality, context windows, rate limiting, and caching.
+[Chat models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-chat-models): needing to understand multimodal capabilities in LangChain, when working with multimodal data like images/audio/video, and when determining if a specific LangChain component supports multimodality. Provides an overview of multimodality in chat models, embedding models, and vector stores. Discusses multimodal inputs/outputs for chat models and how they are formatted.
+[Configurable runnables](https://python.langchain.com/docs/concepts/runnables/#configurable-runnables): trying to understand how to use Runnables, how to configure and compose Runnables, and how to inspect Runnable schemas. The Runnable interface is the foundation for working with LangChain components like language models, output parsers, and retrievers. It defines methods for invoking, batching, streaming, inspecting schemas, configuring, and composing Runnables.
+[Context window](https://python.langchain.com/docs/concepts/chat_models/#context-window): getting an overview of chat models, understanding the key functionality of chat models, and determining if this concept is relevant for their application. Provides an overview of chat models (LLMs with a chat interface), their features, integrations, key methods like invoking/streaming, handling inputs/outputs, using tools/structured outputs, and advanced topics like rate limiting and caching.
+[Conversation patterns](https://python.langchain.com/docs/concepts/chat_history/#conversation-patterns): managing conversation history in chatbots, implementing memory for chat models, understanding correct conversation structure. This page explains the concept of chat history, a record of messages exchanged between a user and a chat model. It covers conversation patterns, guidelines for managing chat history to avoid exceeding context window, and the importance of preserving conversation structure.
+[Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html/): working with document data, retrieving and processing text documents, integrating with text embedding and vector storage systems This page provides details on the Document class and its associated methods and properties, as well as examples of how to use it in various scenarios such as document loading, retrieval, and transformation
+[Embedding models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-embedding-models): needing to understand multimodal capabilities of LangChain components, wanting to work with non-text data like images/audio/video, or planning to incorporate multimodal data in chat interactions. Provides an overview of multimodality support in chat models (inputs and tools), embedding models, and vector stores; notes current limitations and expected future expansions to handle different data types.
+[HumanMessage](https://python.langchain.com/docs/concepts/messages/#humanmessage): LLM should read this page when: 1) Understanding how to structure conversations with chat models, 2) Needing to work with different types of messages (user, assistant, system, tool), 3) Converting between LangChain and OpenAI message formats. Messages are the units of communication used by chat models, representing user input, assistant output, system instructions, and tool results. Key topics include message structure, types (HumanMessage, AIMessage, SystemMessage, ToolMessage), multimodal content support, and integration with OpenAI message format.
+[InjectedState](https://python.langchain.com/docs/concepts/tools/#injectedstate): learning about LangChain's tools, creating custom tools, or integrating tools with chat models. Provides conceptual overview of tools - encapsulating functions with schemas for models to call. Covers creating tools with @tool decorator, tool interfaces, special type annotations, artifacts, best practices, and toolkits.
+[InjectedStore](https://python.langchain.com/docs/concepts/tools/#injectedstore): needing to understand how to create and use tools in LangChain, when needing to pass runtime values to tools, and when needing to configure a tool's schema. Tools are a way to encapsulate functions and their schemas to be used with chat models that support tool calling. The page covers the tool interface, creating tools with the @tool decorator, using tools directly, configuring tool schemas, returning artifacts from tools, and special type annotations like InjectedToolArg and RunnableConfig.
+[InjectedToolArg](https://python.langchain.com/docs/concepts/tools/#injectedtoolarg): trying to understand how to create and use tools in LangChain, when needing to configure tool schemas, and when wanting to return artifacts from tools. Tools provide a way to encapsulate Python functions and schemas to be passed to chat models for execution. The page covers creating tools with the @tool decorator, configuring tool schemas, special type annotations, and tool artifacts.
+[input and output types](https://python.langchain.com/docs/concepts/runnables/#input-and-output-types): needing to interact with LangChain components, wanting to understand the core Runnable interface, or composing complex chains using LCEL. Covers the Runnable interface that defines a standard way to invoke, batch, stream and inspect components; the RunnableConfig for setting runtime options; creating custom Runnables; configurable Runnables; and how input/output types, schemas, and streaming work.
+[Integration packages](https://python.langchain.com/docs/concepts/architecture/#integration-packages): determining the overall architecture of LangChain, understanding the different components and packages in the LangChain ecosystem, or deciding which packages to import for a specific use case. This page provides an overview of the different packages that make up the LangChain framework, including langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, and LangSmith, and explains the purpose and contents of each package.
+[Integration tests](https://python.langchain.com/docs/concepts/testing/#integration-tests): needing guidance on testing LangChain components, understanding different types of tests (unit, integration, standard), or wanting to contribute by adding tests to an integration. Provides an overview of unit tests, integration tests, and standard tests in the LangChain ecosystem, including definitions, examples, and how to implement them for new tools/integrations.
+[invoke](https://python.langchain.com/docs/concepts/runnables/): learning how to use the Runnable interface, when working with custom Runnables, and when needing to configure Runnables at runtime. The page covers the Runnable interface, its methods for invocation, batching, streaming, inspecting schemas, and configuration. It explains RunnableConfig, custom Runnables, and configurable Runnables.
+[JSON mode](https://python.langchain.com/docs/concepts/structured_outputs/#json-mode): LLM should read this page when: 1) It needs to return structured output that conforms to a specific schema, 2) It needs to store model output in a database, 3) It needs to ensure model output matches a predefined format. This page covers how to define an output schema, and techniques like tool calling and JSON mode that allow models to return structured output conforming to that schema, as well as a helper method to streamline the process.
+[langchain-community](https://python.langchain.com/docs/concepts/architecture/#langchain-community): learning about the structure of LangChain, deploying LangChain applications, or needing an overview of the LangChain ecosystem. This page gives an overview of the different packages, components, and services that make up the LangChain framework, including langchain-core, langchain, integration packages, langchain-community, LangGraph, LangServe, and LangSmith.
+[langchain-core](https://python.langchain.com/docs/concepts/architecture/#langchain-core): needing an overview of LangChain's architecture, when considering integrating external packages, or when exploring the LangChain ecosystem. Outlines the main components of LangChain (langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, LangSmith) and their roles, providing a high-level architectural overview.
+[langchain](https://python.langchain.com/docs/concepts/architecture/#langchain): looking to understand the overall architecture of LangChain, when trying to determine what LangChain packages to install, or when wanting an overview of the various LangChain projects. This page outlines the hierarchical structure of the LangChain framework, describing the purpose and contents of key packages like langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, and LangSmith.
+[langgraph](https://python.langchain.com/docs/concepts/architecture/#langgraph): developing applications with LangChain, seeking to understand the overall architecture of LangChain, planning to contribute to or integrate with LangChain The page outlines the layered architecture of LangChain, describing the core abstraction layer, the main LangChain package, integration packages, community integrations, LangGraph for stateful agents, LangServe for deployment, and LangSmith developer tools
+[Managing chat history](https://python.langchain.com/docs/concepts/chat_history/#managing-chat-history): understanding and managing chat history, learning about conversation patterns, following correct chat history structure. Explains chat history concept, provides guidelines for managing chat history, discusses conversation patterns involving users, assistants, and tools.
+[OpenAI format](https://python.langchain.com/docs/concepts/messages/#openai-format): building chat applications, working with chat models, or consuming message streams. This page covers the structure and components of messages used in chat models, including roles, content, usage metadata, and different message types like HumanMessage, AIMessage, and ToolMessage.
+[Propagation of RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#propagation-of-runnableconfig): LLM should read this page when: learning about the LangChain Runnable interface, working with Runnables in LangChain, understanding how to configure and execute Runnables. The page covers the Runnable interface in LangChain, including invoking/batching/streaming Runnables, input/output schemas, configuring Runnables, creating custom Runnables, and working with configurable Runnables.
+[rate-limiting](https://python.langchain.com/docs/concepts/chat_models/#rate-limiting): 1) working with chat models, 2) integrating tool calling or structured outputs, 3) understanding chat model capabilities. Overview of chat model interface, inputs/outputs, standard parameters; tool calling and structured output support; multimodality; context window; advanced topics like rate limiting, caching.
+[RemoveMessage](https://python.langchain.com/docs/concepts/messages/#removemessage): needing information on the structure of messages used in conversational AI models, wanting to understand how messages are represented in LangChain, or looking for details on specific message types like SystemMessage, HumanMessage, and AIMessage. Messages are the basic units of communication in conversational AI models, containing a role (e.g. user, assistant), content (text or multimodal data), and metadata; LangChain provides a standardized message format and different message types to represent various components of a conversation.
+[role](https://python.langchain.com/docs/concepts/messages/#role): understanding how to structure messages for chat models, accessing details about different LangChain message types, or converting between LangChain and OpenAI message formats. Messages are the core unit of communication in chat models, representing input/output content and metadata; LangChain defines SystemMessage, HumanMessage, AIMessage, ToolMessage and others to standardize message format across providers.
+[RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#runnableconfig): needing to understand the Runnable interface, invoking and configuring Runnables, and creating custom Runnables. The page covers the Runnable interface's core concepts, methods like invoke, batch, and stream, input/output types, configuring Runnables with RunnableConfig, creating custom Runnables from functions, and using configurable Runnables.
+[Standard parameters for chat models](https://python.langchain.com/docs/concepts/chat_models/#standard-parameters): building applications using chat models, working with chat models for tool calling, structured outputs or multimodal inputs/outputs. Covers overview of chat models, integrations, interfaces, tool calling, structured outputs, multimodality, context window, rate-limiting, and caching of chat models.
+[Standard tests](https://python.langchain.com/docs/concepts/testing/#standard-tests): needing guidance on testing LangChain components, or wanting to understand the different types of tests used in LangChain. This page discusses unit tests for individual functions, integration tests for validating multiple components working together, and LangChain's standard tests for ensuring consistency across tools and integrations.
+[stream](https://python.langchain.com/docs/concepts/streaming/): [building applications that use streaming, building applications that need to display partial results in real-time, building applications that need to provide updates on pipeline or workflow progress] 'This page covers streaming in LangChain, including what can be streamed in LLM applications, the streaming APIs available, how to write custom data to the stream, and how LangChain automatically enables streaming for chat models in certain cases.'
+[Tokens](https://python.langchain.com/docs/concepts/tokens/): needing to understand tokens used by LLMs, when dealing with character/token counts, when working with multimodal inputs Tokens are the fundamental units processed by language models. A token can represent words, word parts, punctuation, and other units. Models tokenize inputs, process tokens sequentially, and generate new tokens as output. Tokens enable efficient and contextual language processing compared to characters.
+[Tokens](https://python.langchain.com/docs/concepts/tokens/): needing to understand tokens used by LLMs, when dealing with character/token counts, when working with multimodal inputs Tokens are the fundamental units processed by language models. A token can represent words, word parts, punctuation, and other units. Models tokenize inputs, process tokens sequentially, and generate new tokens as output. Tokens enable efficient and contextual language processing compared to characters.
+[Tool artifacts](https://python.langchain.com/docs/concepts/tools/#tool-artifacts): needing to understand what tools are, how to create and use them, and how they integrate with models. Explains what tools are in LangChain, how to create them using the @tool decorator, special type annotations for configuring runtime behavior, how to use tools directly or pass them to chat models, and best practices for designing tools.
+[Tool binding](https://python.langchain.com/docs/concepts/tool_calling/#tool-binding): determining if tool calling functionality is appropriate for their application, understanding the key concepts and workflow of tool calling, and considering best practices for designing tools. This page covers an overview of tool calling, key concepts like tool creation/binding/calling/execution, recommended usage workflow, details on implementing each step, and best practices for designing effective tools.
+[@tool](https://python.langchain.com/docs/concepts/tools/#create-tools-using-the-tool-decorator): needing to understand tools in LangChain, when creating custom tools, or when integrating tools into LangChain applications. Provides an overview of tools, how to create and configure tools using the @tool decorator, different tool types (e.g. with artifacts, injected arguments), and best practices for designing tools.
+[Toolkits](https://python.langchain.com/docs/concepts/tools/#toolkits): creating custom Python functions to use with LangChain, configuring existing tools, or adding tools to chat models. Explains the tool abstraction for encapsulating Python functions, creating tools with the `@tool` decorator, configuring schemas, handling tool artifacts, special type annotations, and using toolkits that group related tools.
+[ToolMessage](https://python.langchain.com/docs/concepts/messages/#toolmessage): understanding the communication protocol with chat models, working with chat history management, or understanding LangChain's Message object structure. Messages are the unit of communication in chat models and represent input/output along with metadata; LangChain provides a unified Message format with types like SystemMessage, HumanMessage, AIMessage to handle different roles, content types, tool calls.
+[Unit tests](https://python.langchain.com/docs/concepts/testing/#unit-tests): developing unit or integration tests, or when contributing to LangChain integrations Provides an overview of unit tests, integration tests, and standard tests used in the LangChain ecosystem
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores/): LLM should read this page when: 1) Building applications that need to index and retrieve information based on semantic similarity 2) Integrating vector databases into their application 3) Exploring advanced vector search and retrieval techniques Vector stores are specialized data stores that enable indexing and retrieving information based on vector representations (embeddings) of data, allowing semantic similarity search over unstructured data like text, images, and audio. The page covers vector store integrations, the core interface, adding/deleting documents, basic and advanced similarity search techniques, and concepts like metadata filtering.
+[with_structured_output](https://python.langchain.com/docs/concepts/structured_outputs/#structured-output-method): [needing to return structured data like JSON or database rows, working with models that support structured output like tools or JSON modes, or integrating with helper functions to streamline structured output] [Overview of structured output concept, schema definition formats like JSON/dicts and Pydantic, model integration methods like tool calling and JSON modes, LangChain structured output helper method]
+[with_types](https://python.langchain.com/docs/concepts/runnables/#with_types): learning about the Runnable interface in LangChain, understanding how to work with Runnables, and customizing or configuring Runnables. The page covers the Runnable interface, optimized parallel execution, streaming APIs, input/output types, inspecting schemas, RunnableConfig options, creating custom Runnables from functions, and configurable Runnables.
--- a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
@ -329,6 +329,7 @@ class Chat__ModuleName__(BaseChatModel):
            additional_kwargs={},  # Used to add additional payload to the message
            response_metadata={  # Use for response metadata
                "time_in_seconds": 3,
+                "model_name": self.model_name,
            },
            usage_metadata={
                "input_tokens": ct_input_tokens,
@ -391,7 +392,10 @@ class Chat__ModuleName__(BaseChatModel):

        # Let's add some other information (e.g., response metadata)
        chunk = ChatGenerationChunk(
-            message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
+            message=AIMessageChunk(
+                content="",
+                response_metadata={"time_in_sec": 3, "model_name": self.model_name},
+            )
        )
        if run_manager:
            # This is optional in newer versions of LangChain
--- a/libs/community/langchain_community/chat_models/perplexity.py
+++ b/libs/community/langchain_community/chat_models/perplexity.py
@ -345,16 +345,25 @@ class ChatPerplexity(BaseChatModel):
            if len(chunk["choices"]) == 0:
                continue
            choice = chunk["choices"][0]
-            citations = chunk.get("citations", [])
+
+            additional_kwargs = {}
+            if first_chunk:
+                additional_kwargs["citations"] = chunk.get("citations", [])
+                for attr in ["images", "related_questions"]:
+                    if attr in chunk:
+                        additional_kwargs[attr] = chunk[attr]

            chunk = self._convert_delta_to_message_chunk(
                choice["delta"], default_chunk_class
            )
+
            if isinstance(chunk, AIMessageChunk) and usage_metadata:
                chunk.usage_metadata = usage_metadata
+
            if first_chunk:
-                chunk.additional_kwargs |= {"citations": citations}
+                chunk.additional_kwargs |= additional_kwargs
                first_chunk = False
+
            finish_reason = choice.get("finish_reason")
            generation_info = (
                dict(finish_reason=finish_reason) if finish_reason is not None else None
@ -386,9 +395,14 @@ class ChatPerplexity(BaseChatModel):
        else:
            usage_metadata = None

+        additional_kwargs = {"citations": response.citations}
+        for attr in ["images", "related_questions"]:
+            if hasattr(response, attr):
+                additional_kwargs[attr] = getattr(response, attr)
+
        message = AIMessage(
            content=response.choices[0].message.content,
-            additional_kwargs={"citations": response.citations},
+            additional_kwargs=additional_kwargs,
            usage_metadata=usage_metadata,
        )
        return ChatResult(generations=[ChatGeneration(message=message)])
--- a/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
+++ b/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
@ -45,32 +45,19 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
        if api_version is not None:
            kwargs["api_version"] = api_version

-        if analysis_features is not None:
-            _SUPPORTED_FEATURES = [
-                DocumentAnalysisFeature.OCR_HIGH_RESOLUTION,
-            ]
-
-            analysis_features = [
-                DocumentAnalysisFeature(feature) for feature in analysis_features
-            ]
-            if any(
-                [feature not in _SUPPORTED_FEATURES for feature in analysis_features]
-            ):
-                logger.warning(
-                    f"The current supported features are: "
-                    f"{[f.value for f in _SUPPORTED_FEATURES]}. "
-                    "Using other features may result in unexpected behavior."
-                )
-
        self.client = DocumentIntelligenceClient(
            endpoint=api_endpoint,
            credential=azure_credential or AzureKeyCredential(api_key),
            headers={"x-ms-useragent": "langchain-parser/1.0.0"},
-            features=analysis_features,
            **kwargs,
        )
        self.api_model = api_model
        self.mode = mode
+        self.features: Optional[List[DocumentAnalysisFeature]] = None
+        if analysis_features is not None:
+            self.features = [
+                DocumentAnalysisFeature(feature) for feature in analysis_features
+            ]
        assert self.mode in ["single", "page", "markdown"]

    def _generate_docs_page(self, result: Any) -> Iterator[Document]:
@ -97,6 +84,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
                body=file_obj,
                content_type="application/octet-stream",
                output_content_format="markdown" if self.mode == "markdown" else "text",
+                features=self.features,
            )
            result = poller.result()

@ -114,6 +102,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
            self.api_model,
            body=AnalyzeDocumentRequest(url_source=url),
            output_content_format="markdown" if self.mode == "markdown" else "text",
+            features=self.features,
        )
        result = poller.result()

@ -131,6 +120,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
            self.api_model,
            body=AnalyzeDocumentRequest(bytes_source=bytes_source),
            output_content_format="markdown" if self.mode == "markdown" else "text",
+            features=self.features,
        )
        result = poller.result()

--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@ -428,6 +428,7 @@ class PyPDFParser(BaseBlobParser):
        """
        if not self.images_parser:
            return ""
+        import pypdf
        from PIL import Image

        if "/XObject" not in cast(dict, page["/Resources"]).keys():
@ -438,13 +439,18 @@ class PyPDFParser(BaseBlobParser):
        for obj in xObject:
            np_image: Any = None
            if xObject[obj]["/Subtype"] == "/Image":
-                if xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITHOUT_LOSS:
+                img_filter = (
+                    xObject[obj]["/Filter"][1:]
+                    if type(xObject[obj]["/Filter"]) is pypdf.generic._base.NameObject
+                    else xObject[obj]["/Filter"][0][1:]
+                )
+                if img_filter in _PDF_FILTER_WITHOUT_LOSS:
                    height, width = xObject[obj]["/Height"], xObject[obj]["/Width"]

                    np_image = np.frombuffer(
                        xObject[obj].get_data(), dtype=np.uint8
                    ).reshape(height, width, -1)
-                elif xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITH_LOSS:
+                elif img_filter in _PDF_FILTER_WITH_LOSS:
                    np_image = np.array(Image.open(io.BytesIO(xObject[obj].get_data())))

                else:
--- a/libs/community/tests/unit_tests/chat_models/test_perplexity.py
+++ b/libs/community/tests/unit_tests/chat_models/test_perplexity.py
@ -116,3 +116,160 @@ def test_perplexity_stream_includes_citations(mocker: MockerFixture) -> None:
    assert full.additional_kwargs == {"citations": ["example.com", "example2.com"]}

    patcher.assert_called_once()
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_stream_includes_citations_and_images(mocker: MockerFixture) -> None:
+    """Test that the stream method includes citations in the additional_kwargs."""
+    llm = ChatPerplexity(
+        model="test",
+        timeout=30,
+        verbose=True,
+    )
+    mock_chunk_0 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Hello ",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "images": [
+            {
+                "image_url": "mock_image_url",
+                "origin_url": "mock_origin_url",
+                "height": 100,
+                "width": 100,
+            }
+        ],
+    }
+    mock_chunk_1 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Perplexity",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "images": [
+            {
+                "image_url": "mock_image_url",
+                "origin_url": "mock_origin_url",
+                "height": 100,
+                "width": 100,
+            }
+        ],
+    }
+    mock_chunks: List[Dict[str, Any]] = [mock_chunk_0, mock_chunk_1]
+    mock_stream = MagicMock()
+    mock_stream.__iter__.return_value = mock_chunks
+    patcher = mocker.patch.object(
+        llm.client.chat.completions, "create", return_value=mock_stream
+    )
+    stream = llm.stream("Hello langchain")
+    full: Optional[BaseMessageChunk] = None
+    for i, chunk in enumerate(stream):
+        full = chunk if full is None else full + chunk
+        assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
+        if i == 0:
+            assert chunk.additional_kwargs["citations"] == [
+                "example.com",
+                "example2.com",
+            ]
+            assert chunk.additional_kwargs["images"] == [
+                {
+                    "image_url": "mock_image_url",
+                    "origin_url": "mock_origin_url",
+                    "height": 100,
+                    "width": 100,
+                }
+            ]
+        else:
+            assert "citations" not in chunk.additional_kwargs
+            assert "images" not in chunk.additional_kwargs
+    assert isinstance(full, AIMessageChunk)
+    assert full.content == "Hello Perplexity"
+    assert full.additional_kwargs == {
+        "citations": ["example.com", "example2.com"],
+        "images": [
+            {
+                "image_url": "mock_image_url",
+                "origin_url": "mock_origin_url",
+                "height": 100,
+                "width": 100,
+            }
+        ],
+    }
+
+    patcher.assert_called_once()
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_stream_includes_citations_and_related_questions(
+    mocker: MockerFixture,
+) -> None:
+    """Test that the stream method includes citations in the additional_kwargs."""
+    llm = ChatPerplexity(
+        model="test",
+        timeout=30,
+        verbose=True,
+    )
+    mock_chunk_0 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Hello ",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "related_questions": ["example_question_1", "example_question_2"],
+    }
+    mock_chunk_1 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Perplexity",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "related_questions": ["example_question_1", "example_question_2"],
+    }
+    mock_chunks: List[Dict[str, Any]] = [mock_chunk_0, mock_chunk_1]
+    mock_stream = MagicMock()
+    mock_stream.__iter__.return_value = mock_chunks
+    patcher = mocker.patch.object(
+        llm.client.chat.completions, "create", return_value=mock_stream
+    )
+    stream = llm.stream("Hello langchain")
+    full: Optional[BaseMessageChunk] = None
+    for i, chunk in enumerate(stream):
+        full = chunk if full is None else full + chunk
+        assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
+        if i == 0:
+            assert chunk.additional_kwargs["citations"] == [
+                "example.com",
+                "example2.com",
+            ]
+            assert chunk.additional_kwargs["related_questions"] == [
+                "example_question_1",
+                "example_question_2",
+            ]
+        else:
+            assert "citations" not in chunk.additional_kwargs
+            assert "related_questions" not in chunk.additional_kwargs
+    assert isinstance(full, AIMessageChunk)
+    assert full.content == "Hello Perplexity"
+    assert full.additional_kwargs == {
+        "citations": ["example.com", "example2.com"],
+        "related_questions": ["example_question_1", "example_question_2"],
+    }
+
+    patcher.assert_called_once()
--- a/libs/community/tests/unit_tests/document_loaders/parsers/test_doc_intelligence.py
+++ b/libs/community/tests/unit_tests/document_loaders/parsers/test_doc_intelligence.py
@ -24,7 +24,6 @@ def test_doc_intelligence(mock_credential: MagicMock, mock_client: MagicMock) ->
        headers={
            "x-ms-useragent": "langchain-parser/1.0.0",
        },
-        features=None,
    )
    assert parser.client == mock_client()
    assert parser.api_model == "prebuilt-layout"
@ -51,7 +50,6 @@ def test_doc_intelligence_with_analysis_features(
        headers={
            "x-ms-useragent": "langchain-parser/1.0.0",
        },
-        features=analysis_features,
    )
    assert parser.client == mock_client()
    assert parser.api_model == "prebuilt-layout"
--- a/libs/community/uv.lock
+++ b/libs/community/uv.lock
@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.9, <4.0"
 resolution-markers = [
    "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy'",
@ -1531,6 +1532,7 @@ requires-dist = [
    { name = "requests", specifier = ">=2,<3" },
    { name = "sqlalchemy", specifier = ">=1.4,<3" },
 ]
+provides-extras = ["community", "anthropic", "openai", "azure-ai", "cohere", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "deepseek", "xai"]

 [package.metadata.requires-dev]
 codespell = [{ name = "codespell", specifier = ">=2.2.0,<3.0.0" }]
@ -1745,7 +1747,7 @@ typing = [

 [[package]]
 name = "langchain-core"
-version = "0.3.45"
+version = "0.3.47"
 source = { editable = "../core" }
 dependencies = [
    { name = "jsonpatch" },
@ -1803,7 +1805,7 @@ typing = [

 [[package]]
 name = "langchain-tests"
-version = "0.3.14"
+version = "0.3.15"
 source = { editable = "../standard-tests" }
 dependencies = [
    { name = "httpx" },
--- a/libs/core/langchain_core/_api/beta_decorator.py
+++ b/libs/core/langchain_core/_api/beta_decorator.py
@ -143,7 +143,7 @@ def beta(
                obj.__init__ = functools.wraps(obj.__init__)(  # type: ignore[misc]
                    warn_if_direct_instance
                )
-                return cast(T, obj)
+                return cast("T", obj)

        elif isinstance(obj, property):
            # note(erick): this block doesn't seem to be used?
@ -217,7 +217,7 @@ def beta(
                """
                wrapper = functools.wraps(wrapped)(wrapper)
                wrapper.__doc__ = new_doc
-                return cast(T, wrapper)
+                return cast("T", wrapper)

        old_doc = inspect.cleandoc(old_doc or "").strip("\n") or ""
        components = [message, addendum]
@ -228,7 +228,7 @@ def beta(
            finalized = finalize(awarning_emitting_wrapper, new_doc)
        else:
            finalized = finalize(warning_emitting_wrapper, new_doc)
-        return cast(T, finalized)
+        return cast("T", finalized)

    return beta

--- a/libs/core/langchain_core/_api/deprecation.py
+++ b/libs/core/langchain_core/_api/deprecation.py
@ -216,7 +216,7 @@ def deprecated(
                obj.__init__ = functools.wraps(obj.__init__)(  # type: ignore[misc]
                    warn_if_direct_instance
                )
-                return cast(T, obj)
+                return cast("T", obj)

        elif isinstance(obj, FieldInfoV1):
            wrapped = None
@ -229,7 +229,7 @@ def deprecated(

            def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
                return cast(
-                    T,
+                    "T",
                    FieldInfoV1(
                        default=obj.default,
                        default_factory=obj.default_factory,
@ -250,7 +250,7 @@ def deprecated(

            def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
                return cast(
-                    T,
+                    "T",
                    FieldInfoV2(
                        default=obj.default,
                        default_factory=obj.default_factory,
@ -264,7 +264,7 @@ def deprecated(
            if not _obj_type:
                _obj_type = "attribute"
            wrapped = None
-            _name = _name or cast(Union[type, Callable], obj.fget).__qualname__
+            _name = _name or cast("Union[type, Callable]", obj.fget).__qualname__
            old_doc = obj.__doc__

            class _DeprecatedProperty(property):
@ -311,14 +311,14 @@ def deprecated(
            def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
                """Finalize the property."""
                return cast(
-                    T,
+                    "T",
                    _DeprecatedProperty(
                        fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
                    ),
                )

        else:
-            _name = _name or cast(Union[type, Callable], obj).__qualname__
+            _name = _name or cast("Union[type, Callable]", obj).__qualname__
            if not _obj_type:
                # edge case: when a function is within another function
                # within a test, this will call it a "method" not a "function"
@ -338,7 +338,7 @@ def deprecated(
                """
                wrapper = functools.wraps(wrapped)(wrapper)
                wrapper.__doc__ = new_doc
-                return cast(T, wrapper)
+                return cast("T", wrapper)

        old_doc = inspect.cleandoc(old_doc or "").strip("\n")

@ -391,7 +391,7 @@ def deprecated(
            finalized = finalize(awarning_emitting_wrapper, new_doc)
        else:
            finalized = finalize(warning_emitting_wrapper, new_doc)
-        return cast(T, finalized)
+        return cast("T", finalized)

    return deprecate

--- a/libs/core/langchain_core/callbacks/init.py
+++ b/libs/core/langchain_core/callbacks/init.py
@ -43,6 +43,10 @@ from langchain_core.callbacks.manager import (
 )
 from langchain_core.callbacks.stdout import StdOutCallbackHandler
 from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain_core.callbacks.usage import (
+    UsageMetadataCallbackHandler,
+    get_usage_metadata_callback,
+)

 __all__ = [
    "dispatch_custom_event",
@ -77,4 +81,6 @@ __all__ = [
    "StdOutCallbackHandler",
    "StreamingStdOutCallbackHandler",
    "FileCallbackHandler",
+    "UsageMetadataCallbackHandler",
+    "get_usage_metadata_callback",
 ]
--- a/libs/core/langchain_core/callbacks/file.py
+++ b/libs/core/langchain_core/callbacks/file.py
@ -31,7 +31,7 @@ class FileCallbackHandler(BaseCallbackHandler):
            mode: The mode to open the file in. Defaults to "a".
            color: The color to use for the text. Defaults to None.
        """
-        self.file = cast(TextIO, Path(filename).open(mode, encoding="utf-8"))  # noqa: SIM115
+        self.file = cast("TextIO", Path(filename).open(mode, encoding="utf-8"))  # noqa: SIM115
        self.color = color

    def __del__(self) -> None:
--- a/libs/core/langchain_core/callbacks/manager.py
+++ b/libs/core/langchain_core/callbacks/manager.py
@ -232,7 +232,7 @@ def shielded(func: Func) -> Func:
    async def wrapped(*args: Any, **kwargs: Any) -> Any:
        return await asyncio.shield(func(*args, **kwargs))

-    return cast(Func, wrapped)
+    return cast("Func", wrapped)


 def handle_event(
@ -308,7 +308,7 @@ def handle_event(
                # The solution is to create a new loop in a new thread.
                with ThreadPoolExecutor(1) as executor:
                    executor.submit(
-                        cast(Callable, copy_context().run), _run_coros, coros
+                        cast("Callable", copy_context().run), _run_coros, coros
                    ).result()
            else:
                _run_coros(coros)
@ -362,7 +362,7 @@ async def _ahandle_event_for_handler(
                    await asyncio.get_event_loop().run_in_executor(
                        None,
                        cast(
-                            Callable,
+                            "Callable",
                            functools.partial(
                                copy_context().run, event, *args, **kwargs
                            ),
@ -2395,7 +2395,7 @@ def _configure(
                        run_tree.trace_id,
                        run_tree.dotted_order,
                    )
-                    handler.run_map[str(run_tree.id)] = cast(Run, run_tree)
+                    handler.run_map[str(run_tree.id)] = cast("Run", run_tree)
    for var, inheritable, handler_class, env_var in _configure_hooks:
        create_one = (
            env_var is not None
@ -2403,7 +2403,9 @@ def _configure(
            and handler_class is not None
        )
        if var.get() is not None or create_one:
-            var_handler = var.get() or cast(type[BaseCallbackHandler], handler_class)()
+            var_handler = (
+                var.get() or cast("type[BaseCallbackHandler]", handler_class)()
+            )
            if handler_class is None:
                if not any(
                    handler is var_handler  # direct pointer comparison
--- a/libs/core/langchain_core/callbacks/usage.py
+++ b/libs/core/langchain_core/callbacks/usage.py
@ -0,0 +1,136 @@
+"""Callback Handler that tracks AIMessage.usage_metadata."""
+
+import threading
+from collections.abc import Generator
+from contextlib import contextmanager
+from contextvars import ContextVar
+from typing import Any, Optional
+
+from langchain_core._api import beta
+from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.messages import AIMessage
+from langchain_core.messages.ai import UsageMetadata, add_usage
+from langchain_core.outputs import ChatGeneration, LLMResult
+
+
+@beta()
+class UsageMetadataCallbackHandler(BaseCallbackHandler):
+    """Callback Handler that tracks AIMessage.usage_metadata.
+
+    Example:
+        .. code-block:: python
+
+            from langchain.chat_models import init_chat_model
+            from langchain_core.callbacks import UsageMetadataCallbackHandler
+
+            llm_1 = init_chat_model(model="openai:gpt-4o-mini")
+            llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
+
+            callback = UsageMetadataCallbackHandler()
+            result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
+            result_2 = llm_2.invoke("Hello", config={"callbacks": [callback]})
+            callback.usage_metadata
+
+        .. code-block:: none
+
+            {'gpt-4o-mini-2024-07-18': {'input_tokens': 8,
+              'output_tokens': 10,
+              'total_tokens': 18,
+              'input_token_details': {'audio': 0, 'cache_read': 0},
+              'output_token_details': {'audio': 0, 'reasoning': 0}},
+             'claude-3-5-haiku-20241022': {'input_tokens': 8,
+              'output_tokens': 21,
+              'total_tokens': 29,
+              'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
+
+    .. versionadded:: 0.3.49
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._lock = threading.Lock()
+        self.usage_metadata: dict[str, UsageMetadata] = {}
+
+    def __repr__(self) -> str:
+        return str(self.usage_metadata)
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        """Collect token usage."""
+        # Check for usage_metadata (langchain-core >= 0.2.2)
+        try:
+            generation = response.generations[0][0]
+        except IndexError:
+            generation = None
+
+        usage_metadata = None
+        model_name = None
+        if isinstance(generation, ChatGeneration):
+            try:
+                message = generation.message
+                if isinstance(message, AIMessage):
+                    usage_metadata = message.usage_metadata
+                    model_name = message.response_metadata.get("model_name")
+            except AttributeError:
+                pass
+
+        # update shared state behind lock
+        if usage_metadata and model_name:
+            with self._lock:
+                if model_name not in self.usage_metadata:
+                    self.usage_metadata[model_name] = usage_metadata
+                else:
+                    self.usage_metadata[model_name] = add_usage(
+                        self.usage_metadata[model_name], usage_metadata
+                    )
+
+
+@contextmanager
+@beta()
+def get_usage_metadata_callback(
+    name: str = "usage_metadata_callback",
+) -> Generator[UsageMetadataCallbackHandler, None, None]:
+    """Get context manager for tracking usage metadata across chat model calls using
+    ``AIMessage.usage_metadata``.
+
+    Args:
+        name (str): The name of the context variable. Defaults to
+            ``"usage_metadata_callback"``.
+
+    Example:
+        .. code-block:: python
+
+            from langchain.chat_models import init_chat_model
+            from langchain_core.callbacks import get_usage_metadata_callback
+
+            llm_1 = init_chat_model(model="openai:gpt-4o-mini")
+            llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
+
+            with get_usage_metadata_callback() as cb:
+                llm_1.invoke("Hello")
+                llm_2.invoke("Hello")
+                print(cb.usage_metadata)
+
+        .. code-block:: none
+
+            {'gpt-4o-mini-2024-07-18': {'input_tokens': 8,
+              'output_tokens': 10,
+              'total_tokens': 18,
+              'input_token_details': {'audio': 0, 'cache_read': 0},
+              'output_token_details': {'audio': 0, 'reasoning': 0}},
+             'claude-3-5-haiku-20241022': {'input_tokens': 8,
+              'output_tokens': 21,
+              'total_tokens': 29,
+              'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
+
+    .. versionadded:: 0.3.49
+    """
+    from langchain_core.tracers.context import register_configure_hook
+
+    usage_metadata_callback_var: ContextVar[Optional[UsageMetadataCallbackHandler]] = (
+        ContextVar(name, default=None)
+    )
+    register_configure_hook(usage_metadata_callback_var, True)
+    cb = UsageMetadataCallbackHandler()
+    usage_metadata_callback_var.set(cb)
+    yield cb
+    usage_metadata_callback_var.set(None)
--- a/libs/core/langchain_core/documents/base.py
+++ b/libs/core/langchain_core/documents/base.py
@ -136,7 +136,7 @@ class Blob(BaseMedia):
        case that value will be used instead.
        """
        if self.metadata and "source" in self.metadata:
-            return cast(Optional[str], self.metadata["source"])
+            return cast("Optional[str]", self.metadata["source"])
        return str(self.path) if self.path else None

    @model_validator(mode="before")
--- a/libs/core/langchain_core/indexing/api.py
+++ b/libs/core/langchain_core/indexing/api.py
@ -395,7 +395,7 @@ def index(
                if cleanup == "scoped_full":
                    scoped_full_cleanup_source_ids.add(source_id)
            # source ids cannot be None after for loop above.
-            source_ids = cast(Sequence[str], source_ids)  # type: ignore[assignment]
+            source_ids = cast("Sequence[str]", source_ids)  # type: ignore[assignment]

        exists_batch = record_manager.exists([doc.uid for doc in hashed_docs])

@ -461,7 +461,7 @@ def index(
                    )
                    raise AssertionError(msg)

-            _source_ids = cast(Sequence[str], source_ids)
+            _source_ids = cast("Sequence[str]", source_ids)

            uids_to_delete = record_manager.list_keys(
                group_ids=_source_ids, before=index_start_dt
@ -473,7 +473,9 @@ def index(
                record_manager.delete_keys(uids_to_delete)
                num_deleted += len(uids_to_delete)

-    if cleanup == "full" or cleanup == "scoped_full":
+    if cleanup == "full" or (
+        cleanup == "scoped_full" and scoped_full_cleanup_source_ids
+    ):
        delete_group_ids: Optional[Sequence[str]] = None
        if cleanup == "scoped_full":
            delete_group_ids = list(scoped_full_cleanup_source_ids)
@ -708,7 +710,7 @@ async def aindex(
                if cleanup == "scoped_full":
                    scoped_full_cleanup_source_ids.add(source_id)
            # source ids cannot be None after for loop above.
-            source_ids = cast(Sequence[str], source_ids)
+            source_ids = cast("Sequence[str]", source_ids)

        exists_batch = await record_manager.aexists([doc.uid for doc in hashed_docs])

@ -774,7 +776,7 @@ async def aindex(
                    )
                    raise AssertionError(msg)

-            _source_ids = cast(Sequence[str], source_ids)
+            _source_ids = cast("Sequence[str]", source_ids)

            uids_to_delete = await record_manager.alist_keys(
                group_ids=_source_ids, before=index_start_dt
@ -786,7 +788,9 @@ async def aindex(
                await record_manager.adelete_keys(uids_to_delete)
                num_deleted += len(uids_to_delete)

-    if cleanup == "full" or cleanup == "scoped_full":
+    if cleanup == "full" or (
+        cleanup == "scoped_full" and scoped_full_cleanup_source_ids
+    ):
        delete_group_ids: Optional[Sequence[str]] = None
        if cleanup == "scoped_full":
            delete_group_ids = list(scoped_full_cleanup_source_ids)
--- a/libs/core/langchain_core/indexing/in_memory.py
+++ b/libs/core/langchain_core/indexing/in_memory.py
@ -41,7 +41,7 @@ class InMemoryDocumentIndex(DocumentIndex):
                id_ = item.id

            self.store[id_] = item_
-            ok_ids.append(cast(str, item_.id))
+            ok_ids.append(cast("str", item_.id))

        return UpsertResponse(succeeded=ok_ids, failed=[])

--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@ -303,7 +303,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
    ) -> BaseMessage:
        config = ensure_config(config)
        return cast(
-            ChatGeneration,
+            "ChatGeneration",
            self.generate_prompt(
                [self._convert_input(input)],
                stop=stop,
@ -335,7 +335,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
            run_id=config.pop("run_id", None),
            **kwargs,
        )
-        return cast(ChatGeneration, llm_result.generations[0][0]).message
+        return cast("ChatGeneration", llm_result.generations[0][0]).message

    def _should_stream(
        self,
@ -383,7 +383,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
            # model doesn't implement streaming, so use default implementation
            yield cast(
-                BaseMessageChunk, self.invoke(input, config=config, stop=stop, **kwargs)
+                "BaseMessageChunk",
+                self.invoke(input, config=config, stop=stop, **kwargs),
            )
        else:
            config = ensure_config(config)
@ -430,7 +431,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                        chunk.message.id = f"run-{run_manager.run_id}"
                    chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
                    run_manager.on_llm_new_token(
-                        cast(str, chunk.message.content), chunk=chunk
+                        cast("str", chunk.message.content), chunk=chunk
                    )
                    yield chunk.message
                    if generation is None:
@ -464,7 +465,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
            # No async or sync stream is implemented, so fall back to ainvoke
            yield cast(
-                BaseMessageChunk,
+                "BaseMessageChunk",
                await self.ainvoke(input, config=config, stop=stop, **kwargs),
            )
            return
@ -518,7 +519,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                    chunk.message.id = f"run-{run_manager.run_id}"
                chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
                await run_manager.on_llm_new_token(
-                    cast(str, chunk.message.content), chunk=chunk
+                    cast("str", chunk.message.content), chunk=chunk
                )
                yield chunk.message
                if generation is None:
@ -899,7 +900,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                    if chunk.message.id is None:
                        chunk.message.id = f"run-{run_manager.run_id}"
                    run_manager.on_llm_new_token(
-                        cast(str, chunk.message.content), chunk=chunk
+                        cast("str", chunk.message.content), chunk=chunk
                    )
                chunks.append(chunk)
            result = generate_from_stream(iter(chunks))
@ -972,7 +973,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                    if chunk.message.id is None:
                        chunk.message.id = f"run-{run_manager.run_id}"
                    await run_manager.on_llm_new_token(
-                        cast(str, chunk.message.content), chunk=chunk
+                        cast("str", chunk.message.content), chunk=chunk
                    )
                chunks.append(chunk)
            result = generate_from_stream(iter(chunks))
@ -1307,7 +1308,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        )
        if isinstance(schema, type) and is_basemodel_subclass(schema):
            output_parser: OutputParserLike = PydanticToolsParser(
-                tools=[cast(TypeBaseModel, schema)], first_tool_only=True
+                tools=[cast("TypeBaseModel", schema)], first_tool_only=True
            )
        else:
            key_name = convert_to_openai_tool(schema)["function"]["name"]
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@ -262,7 +262,7 @@ class GenericFakeChatModel(BaseChatModel):
                msg = "Expected content to be a string."
                raise ValueError(msg)

-            content_chunks = cast(list[str], re.split(r"(\s)", content))
+            content_chunks = cast("list[str]", re.split(r"(\s)", content))

            for token in content_chunks:
                chunk = ChatGenerationChunk(
@ -280,7 +280,7 @@ class GenericFakeChatModel(BaseChatModel):
                    for fkey, fvalue in value.items():
                        if isinstance(fvalue, str):
                            # Break function call by `,`
-                            fvalue_chunks = cast(list[str], re.split(r"(,)", fvalue))
+                            fvalue_chunks = cast("list[str]", re.split(r"(,)", fvalue))
                            for fvalue_chunk in fvalue_chunks:
                                chunk = ChatGenerationChunk(
                                    message=AIMessageChunk(
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
@ -449,7 +449,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
                return [g[0].text for g in llm_result.generations]
            except Exception as e:
                if return_exceptions:
-                    return cast(list[str], [e for _ in inputs])
+                    return cast("list[str]", [e for _ in inputs])
                else:
                    raise
        else:
@ -495,7 +495,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
                return [g[0].text for g in llm_result.generations]
            except Exception as e:
                if return_exceptions:
-                    return cast(list[str], [e for _ in inputs])
+                    return cast("list[str]", [e for _ in inputs])
                else:
                    raise
        else:
@ -901,13 +901,15 @@ class BaseLLM(BaseLanguageModel[str], ABC):
            ):
                msg = "run_name must be a list of the same length as prompts"
                raise ValueError(msg)
-            callbacks = cast(list[Callbacks], callbacks)
-            tags_list = cast(list[Optional[list[str]]], tags or ([None] * len(prompts)))
+            callbacks = cast("list[Callbacks]", callbacks)
+            tags_list = cast(
+                "list[Optional[list[str]]]", tags or ([None] * len(prompts))
+            )
            metadata_list = cast(
-                list[Optional[dict[str, Any]]], metadata or ([{}] * len(prompts))
+                "list[Optional[dict[str, Any]]]", metadata or ([{}] * len(prompts))
            )
            run_name_list = run_name or cast(
-                list[Optional[str]], ([None] * len(prompts))
+                "list[Optional[str]]", ([None] * len(prompts))
            )
            callback_managers = [
                CallbackManager.configure(
@ -925,16 +927,16 @@ class BaseLLM(BaseLanguageModel[str], ABC):
            # We've received a single callbacks arg to apply to all inputs
            callback_managers = [
                CallbackManager.configure(
-                    cast(Callbacks, callbacks),
+                    cast("Callbacks", callbacks),
                    self.callbacks,
                    self.verbose,
-                    cast(list[str], tags),
+                    cast("list[str]", tags),
                    self.tags,
-                    cast(dict[str, Any], metadata),
+                    cast("dict[str, Any]", metadata),
                    self.metadata,
                )
            ] * len(prompts)
-            run_name_list = [cast(Optional[str], run_name)] * len(prompts)
+            run_name_list = [cast("Optional[str]", run_name)] * len(prompts)
        run_ids_list = self._get_run_ids_list(run_id, prompts)
        params = self.dict()
        params["stop"] = stop
@ -1143,13 +1145,15 @@ class BaseLLM(BaseLanguageModel[str], ABC):
            ):
                msg = "run_name must be a list of the same length as prompts"
                raise ValueError(msg)
-            callbacks = cast(list[Callbacks], callbacks)
-            tags_list = cast(list[Optional[list[str]]], tags or ([None] * len(prompts)))
+            callbacks = cast("list[Callbacks]", callbacks)
+            tags_list = cast(
+                "list[Optional[list[str]]]", tags or ([None] * len(prompts))
+            )
            metadata_list = cast(
-                list[Optional[dict[str, Any]]], metadata or ([{}] * len(prompts))
+                "list[Optional[dict[str, Any]]]", metadata or ([{}] * len(prompts))
            )
            run_name_list = run_name or cast(
-                list[Optional[str]], ([None] * len(prompts))
+                "list[Optional[str]]", ([None] * len(prompts))
            )
            callback_managers = [
                AsyncCallbackManager.configure(
@ -1167,16 +1171,16 @@ class BaseLLM(BaseLanguageModel[str], ABC):
            # We've received a single callbacks arg to apply to all inputs
            callback_managers = [
                AsyncCallbackManager.configure(
-                    cast(Callbacks, callbacks),
+                    cast("Callbacks", callbacks),
                    self.callbacks,
                    self.verbose,
-                    cast(list[str], tags),
+                    cast("list[str]", tags),
                    self.tags,
-                    cast(dict[str, Any], metadata),
+                    cast("dict[str, Any]", metadata),
                    self.metadata,
                )
            ] * len(prompts)
-            run_name_list = [cast(Optional[str], run_name)] * len(prompts)
+            run_name_list = [cast("Optional[str]", run_name)] * len(prompts)
        run_ids_list = self._get_run_ids_list(run_id, prompts)
        params = self.dict()
        params["stop"] = stop
--- a/libs/core/langchain_core/load/serializable.py
+++ b/libs/core/langchain_core/load/serializable.py
@ -237,7 +237,7 @@ class Serializable(BaseModel, ABC):
                        raise ValueError(msg)

            # Get a reference to self bound to each class in the MRO
-            this = cast(Serializable, self if cls is None else super(cls, self))
+            this = cast("Serializable", self if cls is None else super(cls, self))

            secrets.update(this.lc_secrets)
            # Now also add the aliases for the secrets
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@ -500,14 +500,14 @@ def add_usage(
    if not (left or right):
        return UsageMetadata(input_tokens=0, output_tokens=0, total_tokens=0)
    if not (left and right):
-        return cast(UsageMetadata, left or right)
+        return cast("UsageMetadata", left or right)

    return UsageMetadata(
        **cast(
-            UsageMetadata,
+            "UsageMetadata",
            _dict_int_op(
-                cast(dict, left),
-                cast(dict, right),
+                cast("dict", left),
+                cast("dict", right),
                operator.add,
            ),
        )
@ -557,14 +557,14 @@ def subtract_usage(
    if not (left or right):
        return UsageMetadata(input_tokens=0, output_tokens=0, total_tokens=0)
    if not (left and right):
-        return cast(UsageMetadata, left or right)
+        return cast("UsageMetadata", left or right)

    return UsageMetadata(
        **cast(
-            UsageMetadata,
+            "UsageMetadata",
            _dict_int_op(
-                cast(dict, left),
-                cast(dict, right),
+                cast("dict", left),
+                cast("dict", right),
                (lambda le, ri: max(le - ri, 0)),
            ),
        )
--- a/libs/core/langchain_core/messages/base.py
+++ b/libs/core/langchain_core/messages/base.py
@ -160,13 +160,13 @@ def merge_content(
        if isinstance(merged, str):
            # If the next chunk is also a string, then merge them naively
            if isinstance(content, str):
-                merged = cast(str, merged) + content
+                merged = cast("str", merged) + content
            # If the next chunk is a list, add the current to the start of the list
            else:
                merged = [merged] + content  # type: ignore
        elif isinstance(content, list):
            # If both are lists
-            merged = merge_lists(cast(list, merged), content)  # type: ignore
+            merged = merge_lists(cast("list", merged), content)  # type: ignore
        # If the first content is a list, and the second content is a string
        else:
            # If the last element of the first content is a string
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@ -504,7 +504,7 @@ def filter_messages(
                        )
                    ]

-                msg = msg.model_copy(
+                msg = msg.model_copy(  # noqa: PLW2901
                    update={"tool_calls": tool_calls, "content": content}
                )
            elif (
@ -908,7 +908,7 @@ def trim_messages(
    try:
        from langchain_text_splitters import TextSplitter
    except ImportError:
-        text_splitter_fn: Optional[Callable] = cast(Optional[Callable], text_splitter)
+        text_splitter_fn: Optional[Callable] = cast("Optional[Callable]", text_splitter)
    else:
        if isinstance(text_splitter, TextSplitter):
            text_splitter_fn = text_splitter.split_text
@ -1148,7 +1148,7 @@ def convert_to_openai_messages(
                            raise ValueError(err)
                        if not any(
                            tool_call["id"] == block["id"]
-                            for tool_call in cast(AIMessage, message).tool_calls
+                            for tool_call in cast("AIMessage", message).tool_calls
                        ):
                            oai_msg["tool_calls"] = oai_msg.get("tool_calls", [])
                            oai_msg["tool_calls"].append(
--- a/libs/core/langchain_core/output_parsers/json.py
+++ b/libs/core/langchain_core/output_parsers/json.py
@ -125,5 +125,11 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):

 # For backwards compatibility
 SimpleJsonOutputParser = JsonOutputParser
-parse_partial_json = parse_partial_json
-parse_and_check_json_markdown = parse_and_check_json_markdown
+
+
+__all__ = [
+    "JsonOutputParser",
+    "SimpleJsonOutputParser",  # For backwards compatibility
+    "parse_partial_json",  # For backwards compatibility
+    "parse_and_check_json_markdown",  # For backwards compatibility
+]
--- a/libs/core/langchain_core/output_parsers/list.py
+++ b/libs/core/langchain_core/output_parsers/list.py
@ -73,9 +73,10 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
                chunk_content = chunk.content
                if not isinstance(chunk_content, str):
                    continue
-                chunk = chunk_content
-            # add current chunk to buffer
-            buffer += chunk
+                buffer += chunk_content
+            else:
+                # add current chunk to buffer
+                buffer += chunk
            # parse buffer into a list of parts
            try:
                done_idx = 0
@ -105,9 +106,10 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
                chunk_content = chunk.content
                if not isinstance(chunk_content, str):
                    continue
-                chunk = chunk_content
-            # add current chunk to buffer
-            buffer += chunk
+                buffer += chunk_content
+            else:
+                # add current chunk to buffer
+                buffer += chunk
            # parse buffer into a list of parts
            try:
                done_idx = 0
--- a/libs/core/langchain_core/prompt_values.py
+++ b/libs/core/langchain_core/prompt_values.py
@ -124,7 +124,7 @@ class ImagePromptValue(PromptValue):

    def to_messages(self) -> list[BaseMessage]:
        """Return prompt (image URL) as messages."""
-        return [HumanMessage(content=[cast(dict, self.image_url)])]
+        return [HumanMessage(content=[cast("dict", self.image_url)])]


 class ChatPromptValueConcrete(ChatPromptValue):
--- a/libs/core/langchain_core/prompts/chat.py
+++ b/libs/core/langchain_core/prompts/chat.py
@ -530,14 +530,14 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
                    if isinstance(tmpl, str):
                        text: str = tmpl
                    else:
-                        text = cast(_TextTemplateParam, tmpl)["text"]  # type: ignore[assignment]
+                        text = cast("_TextTemplateParam", tmpl)["text"]  # type: ignore[assignment]
                    prompt.append(
                        PromptTemplate.from_template(
                            text, template_format=template_format
                        )
                    )
                elif isinstance(tmpl, dict) and "image_url" in tmpl:
-                    img_template = cast(_ImageTemplateParam, tmpl)["image_url"]
+                    img_template = cast("_ImageTemplateParam", tmpl)["image_url"]
                    input_variables = []
                    if isinstance(img_template, str):
                        vars = get_template_variables(img_template, template_format)
@ -1024,7 +1024,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
            "partial_variables": partial_vars,
            **kwargs,
        }
-        cast(type[ChatPromptTemplate], super()).__init__(messages=_messages, **kwargs)
+        cast("type[ChatPromptTemplate]", super()).__init__(messages=_messages, **kwargs)

    @classmethod
    def get_lc_namespace(cls) -> list[str]:
@ -1382,11 +1382,11 @@ def _create_template_from_message_type(
        )
    elif message_type in ("ai", "assistant"):
        message = AIMessagePromptTemplate.from_template(
-            cast(str, template), template_format=template_format
+            cast("str", template), template_format=template_format
        )
    elif message_type == "system":
        message = SystemMessagePromptTemplate.from_template(
-            cast(str, template), template_format=template_format
+            cast("str", template), template_format=template_format
        )
    elif message_type == "placeholder":
        if isinstance(template, str):
@ -1484,7 +1484,7 @@ def _convert_to_message(
        else:
            _message = message_type_str(
                prompt=PromptTemplate.from_template(
-                    cast(str, template), template_format=template_format
+                    cast("str", template), template_format=template_format
                )
            )
    else:
--- a/libs/core/langchain_core/runnables/base.py
+++ b/libs/core/langchain_core/runnables/base.py
@ -782,10 +782,10 @@ class Runnable(Generic[Input, Output], ABC):

        # If there's only one input, don't bother with the executor
        if len(inputs) == 1:
-            return cast(list[Output], [invoke(inputs[0], configs[0])])
+            return cast("list[Output]", [invoke(inputs[0], configs[0])])

        with get_executor_for_config(configs[0]) as executor:
-            return cast(list[Output], list(executor.map(invoke, inputs, configs)))
+            return cast("list[Output]", list(executor.map(invoke, inputs, configs)))

    @overload
    def batch_as_completed(
@ -1532,7 +1532,7 @@ class Runnable(Generic[Input, Output], ABC):
        return RunnableBinding(
            bound=self,
            config=cast(
-                RunnableConfig,
+                "RunnableConfig",
                {**(config or {}), **kwargs},
            ),  # type: ignore[misc]
            kwargs={},
@ -1921,7 +1921,7 @@ class Runnable(Generic[Input, Output], ABC):
            child_config = patch_config(config, callbacks=run_manager.get_child())
            with set_config_context(child_config) as context:
                output = cast(
-                    Output,
+                    "Output",
                    context.run(
                        call_func_with_variable_args,  # type: ignore[arg-type]
                        func,  # type: ignore[arg-type]
@ -2036,7 +2036,7 @@ class Runnable(Generic[Input, Output], ABC):
            for run_manager in run_managers:
                run_manager.on_chain_error(e)
            if return_exceptions:
-                return cast(list[Output], [e for _ in input])
+                return cast("list[Output]", [e for _ in input])
            else:
                raise
        else:
@ -2048,7 +2048,7 @@ class Runnable(Generic[Input, Output], ABC):
                else:
                    run_manager.on_chain_end(out)
            if return_exceptions or first_exception is None:
-                return cast(list[Output], output)
+                return cast("list[Output]", output)
            else:
                raise first_exception

@ -2112,7 +2112,7 @@ class Runnable(Generic[Input, Output], ABC):
                *(run_manager.on_chain_error(e) for run_manager in run_managers)
            )
            if return_exceptions:
-                return cast(list[Output], [e for _ in input])
+                return cast("list[Output]", [e for _ in input])
            else:
                raise
        else:
@ -2126,7 +2126,7 @@ class Runnable(Generic[Input, Output], ABC):
                    coros.append(run_manager.on_chain_end(out))
            await asyncio.gather(*coros)
            if return_exceptions or first_exception is None:
-                return cast(list[Output], output)
+                return cast("list[Output]", output)
            else:
                raise first_exception

@ -2183,7 +2183,7 @@ class Runnable(Generic[Input, Output], ABC):
                iterator = context.run(transformer, input_for_transform, **kwargs)  # type: ignore[arg-type]
                if stream_handler := next(
                    (
-                        cast(_StreamingCallbackHandler, h)
+                        cast("_StreamingCallbackHandler", h)
                        for h in run_manager.handlers
                        # instance check OK here, it's a mixin
                        if isinstance(h, _StreamingCallbackHandler)  # type: ignore[misc]
@ -2286,7 +2286,7 @@ class Runnable(Generic[Input, Output], ABC):

                if stream_handler := next(
                    (
-                        cast(_StreamingCallbackHandler, h)
+                        cast("_StreamingCallbackHandler", h)
                        for h in run_manager.handlers
                        # instance check OK here, it's a mixin
                        if isinstance(h, _StreamingCallbackHandler)  # type: ignore[misc]
@ -2307,7 +2307,7 @@ class Runnable(Generic[Input, Output], ABC):
                                context=context,
                            )
                        else:
-                            chunk = cast(Output, await py_anext(iterator))
+                            chunk = cast("Output", await py_anext(iterator))
                        yield chunk
                        if final_output_supported:
                            if final_output is None:
@ -3029,7 +3029,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
            raise
        else:
            run_manager.on_chain_end(input)
-            return cast(Output, input)
+            return cast("Output", input)

    async def ainvoke(
        self,
@ -3072,7 +3072,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
            raise
        else:
            await run_manager.on_chain_end(input)
-            return cast(Output, input)
+            return cast("Output", input)

    def batch(
        self,
@ -3162,7 +3162,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                inputs = []
                for i in range(len(configs)):
                    if i in failed_inputs_map:
-                        inputs.append(cast(Input, failed_inputs_map[i]))
+                        inputs.append(cast("Input", failed_inputs_map[i]))
                    else:
                        inputs.append(inputs_copy.pop(0))
            else:
@ -3185,7 +3185,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
            for rm in run_managers:
                rm.on_chain_error(e)
            if return_exceptions:
-                return cast(list[Output], [e for _ in inputs])
+                return cast("list[Output]", [e for _ in inputs])
            else:
                raise
        else:
@ -3197,7 +3197,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                else:
                    run_manager.on_chain_end(out)
            if return_exceptions or first_exception is None:
-                return cast(list[Output], inputs)
+                return cast("list[Output]", inputs)
            else:
                raise first_exception

@ -3292,7 +3292,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                inputs = []
                for i in range(len(configs)):
                    if i in failed_inputs_map:
-                        inputs.append(cast(Input, failed_inputs_map[i]))
+                        inputs.append(cast("Input", failed_inputs_map[i]))
                    else:
                        inputs.append(inputs_copy.pop(0))
            else:
@ -3313,7 +3313,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
        except BaseException as e:
            await asyncio.gather(*(rm.on_chain_error(e) for rm in run_managers))
            if return_exceptions:
-                return cast(list[Output], [e for _ in inputs])
+                return cast("list[Output]", [e for _ in inputs])
            else:
                raise
        else:
@ -3327,7 +3327,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                    coros.append(run_manager.on_chain_end(out))
            await asyncio.gather(*coros)
            if return_exceptions or first_exception is None:
-                return cast(list[Output], inputs)
+                return cast("list[Output]", inputs)
            else:
                raise first_exception

@ -3346,7 +3346,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
        # transform the input stream of each step with the next
        # steps that don't natively support transforming an input stream will
        # buffer input in memory until all available, and then start emitting output
-        final_pipeline = cast(Iterator[Output], input)
+        final_pipeline = cast("Iterator[Output]", input)
        for idx, step in enumerate(steps):
            config = patch_config(
                config, callbacks=run_manager.get_child(f"seq:step:{idx + 1}")
@ -3374,7 +3374,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
        # transform the input stream of each step with the next
        # steps that don't natively support transforming an input stream will
        # buffer input in memory until all available, and then start emitting output
-        final_pipeline = cast(AsyncIterator[Output], input)
+        final_pipeline = cast("AsyncIterator[Output]", input)
        for idx, step in enumerate(steps):
            config = patch_config(
                config,
@ -4189,7 +4189,7 @@ class RunnableGenerator(Runnable[Input, Output]):
        final: Optional[Output] = None
        for output in self.stream(input, config, **kwargs):
            final = output if final is None else final + output  # type: ignore[operator]
-        return cast(Output, final)
+        return cast("Output", final)

    def atransform(
        self,
@ -4222,7 +4222,7 @@ class RunnableGenerator(Runnable[Input, Output]):
        final: Optional[Output] = None
        async for output in self.astream(input, config, **kwargs):
            final = output if final is None else final + output  # type: ignore[operator]
-        return cast(Output, final)
+        return cast("Output", final)


 class RunnableLambda(Runnable[Input, Output]):
@ -4336,7 +4336,7 @@ class RunnableLambda(Runnable[Input, Output]):
            self.afunc = func
            func_for_name = func
        elif callable(func):
-            self.func = cast(Callable[[Input], Output], func)
+            self.func = cast("Callable[[Input], Output]", func)
            func_for_name = func
        else:
            msg = (
@ -4556,7 +4556,7 @@ class RunnableLambda(Runnable[Input, Output]):
        if inspect.isgeneratorfunction(self.func):
            output: Optional[Output] = None
            for chunk in call_func_with_variable_args(
-                cast(Callable[[Input], Iterator[Output]], self.func),
+                cast("Callable[[Input], Iterator[Output]]", self.func),
                input,
                config,
                run_manager,
@ -4589,7 +4589,7 @@ class RunnableLambda(Runnable[Input, Output]):
                    recursion_limit=recursion_limit - 1,
                ),
            )
-        return cast(Output, output)
+        return cast("Output", output)

    async def _ainvoke(
        self,
@ -4611,7 +4611,7 @@ class RunnableLambda(Runnable[Input, Output]):
                ) -> Output:
                    output: Optional[Output] = None
                    for chunk in call_func_with_variable_args(
-                        cast(Callable[[Input], Iterator[Output]], self.func),
+                        cast("Callable[[Input], Iterator[Output]]", self.func),
                        input,
                        config,
                        run_manager.get_sync(),
@ -4624,7 +4624,7 @@ class RunnableLambda(Runnable[Input, Output]):
                                output = output + chunk  # type: ignore[operator]
                            except TypeError:
                                output = chunk
-                    return cast(Output, output)
+                    return cast("Output", output)

            else:

@ -4648,9 +4648,9 @@ class RunnableLambda(Runnable[Input, Output]):
            output: Optional[Output] = None
            async with aclosing(
                cast(
-                    AsyncGenerator[Any, Any],
+                    "AsyncGenerator[Any, Any]",
                    acall_func_with_variable_args(
-                        cast(Callable, afunc),
+                        cast("Callable", afunc),
                        input,
                        config,
                        run_manager,
@ -4659,7 +4659,7 @@ class RunnableLambda(Runnable[Input, Output]):
                )
            ) as stream:
                async for chunk in cast(
-                    AsyncIterator[Output],
+                    "AsyncIterator[Output]",
                    stream,
                ):
                    if output is None:
@ -4671,7 +4671,7 @@ class RunnableLambda(Runnable[Input, Output]):
                            output = chunk
        else:
            output = await acall_func_with_variable_args(
-                cast(Callable, afunc), input, config, run_manager, **kwargs
+                cast("Callable", afunc), input, config, run_manager, **kwargs
            )
        # If the output is a Runnable, invoke it
        if isinstance(output, Runnable):
@ -4689,7 +4689,7 @@ class RunnableLambda(Runnable[Input, Output]):
                    recursion_limit=recursion_limit - 1,
                ),
            )
-        return cast(Output, output)
+        return cast("Output", output)

    def _config(
        self, config: Optional[RunnableConfig], callable: Callable[..., Any]
@ -4779,7 +4779,7 @@ class RunnableLambda(Runnable[Input, Output]):
        if inspect.isgeneratorfunction(self.func):
            output: Optional[Output] = None
            for chunk in call_func_with_variable_args(
-                self.func, cast(Input, final), config, run_manager, **kwargs
+                self.func, cast("Input", final), config, run_manager, **kwargs
            ):
                yield chunk
                if output is None:
@ -4791,7 +4791,7 @@ class RunnableLambda(Runnable[Input, Output]):
                        output = chunk
        else:
            output = call_func_with_variable_args(
-                self.func, cast(Input, final), config, run_manager, **kwargs
+                self.func, cast("Input", final), config, run_manager, **kwargs
            )

        # If the output is a Runnable, use its stream output
@ -4813,7 +4813,7 @@ class RunnableLambda(Runnable[Input, Output]):
                yield chunk
        elif not inspect.isgeneratorfunction(self.func):
            # Otherwise, just yield it
-            yield cast(Output, output)
+            yield cast("Output", output)

    def transform(
        self,
@ -4895,10 +4895,10 @@ class RunnableLambda(Runnable[Input, Output]):
        if is_async_generator(afunc):
            output: Optional[Output] = None
            async for chunk in cast(
-                AsyncIterator[Output],
+                "AsyncIterator[Output]",
                acall_func_with_variable_args(
-                    cast(Callable, afunc),
-                    cast(Input, final),
+                    cast("Callable", afunc),
+                    cast("Input", final),
                    config,
                    run_manager,
                    **kwargs,
@ -4914,7 +4914,11 @@ class RunnableLambda(Runnable[Input, Output]):
                        output = chunk
        else:
            output = await acall_func_with_variable_args(
-                cast(Callable, afunc), cast(Input, final), config, run_manager, **kwargs
+                cast("Callable", afunc),
+                cast("Input", final),
+                config,
+                run_manager,
+                **kwargs,
            )

        # If the output is a Runnable, use its astream output
@ -4936,7 +4940,7 @@ class RunnableLambda(Runnable[Input, Output]):
                yield chunk
        elif not is_async_generator(afunc):
            # Otherwise, just yield it
-            yield cast(Output, output)
+            yield cast("Output", output)

    async def atransform(
        self,
@ -5301,7 +5305,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
    @override
    def InputType(self) -> type[Input]:
        return (
-            cast(type[Input], self.custom_input_type)
+            cast("type[Input]", self.custom_input_type)
            if self.custom_input_type is not None
            else self.bound.InputType
        )
@ -5310,7 +5314,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
    @override
    def OutputType(self) -> type[Output]:
        return (
-            cast(type[Output], self.custom_output_type)
+            cast("type[Output]", self.custom_output_type)
            if self.custom_output_type is not None
            else self.bound.OutputType
        )
@ -5383,7 +5387,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
    ) -> list[Output]:
        if isinstance(config, list):
            configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                [self._merge_configs(conf) for conf in config],
            )
        else:
@ -5405,7 +5409,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
    ) -> list[Output]:
        if isinstance(config, list):
            configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                [self._merge_configs(conf) for conf in config],
            )
        else:
@ -5447,7 +5451,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
    ) -> Iterator[tuple[int, Union[Output, Exception]]]:
        if isinstance(config, Sequence):
            configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                [self._merge_configs(conf) for conf in config],
            )
        else:
@ -5498,7 +5502,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
    ) -> AsyncIterator[tuple[int, Union[Output, Exception]]]:
        if isinstance(config, Sequence):
            configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                [self._merge_configs(conf) for conf in config],
            )
        else:
@ -5665,7 +5669,7 @@ class RunnableBinding(RunnableBindingBase[Input, Output]):
        return self.__class__(
            bound=self.bound,
            kwargs=self.kwargs,
-            config=cast(RunnableConfig, {**self.config, **(config or {}), **kwargs}),
+            config=cast("RunnableConfig", {**self.config, **(config or {}), **kwargs}),
            custom_input_type=self.custom_input_type,
            custom_output_type=self.custom_output_type,
        )
@ -5835,9 +5839,9 @@ def coerce_to_runnable(thing: RunnableLike) -> Runnable[Input, Output]:
    elif is_async_generator(thing) or inspect.isgeneratorfunction(thing):
        return RunnableGenerator(thing)
    elif callable(thing):
-        return RunnableLambda(cast(Callable[[Input], Output], thing))
+        return RunnableLambda(cast("Callable[[Input], Output]", thing))
    elif isinstance(thing, dict):
-        return cast(Runnable[Input, Output], RunnableParallel(thing))
+        return cast("Runnable[Input, Output]", RunnableParallel(thing))
    else:
        msg = (
            f"Expected a Runnable, callable or dict."
--- a/libs/core/langchain_core/runnables/branch.py
+++ b/libs/core/langchain_core/runnables/branch.py
@ -105,7 +105,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
            raise TypeError(msg)

        default_ = cast(
-            Runnable[Input, Output], coerce_to_runnable(cast(RunnableLike, default))
+            "Runnable[Input, Output]", coerce_to_runnable(cast("RunnableLike", default))
        )

        _branches = []
@ -125,7 +125,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
                )
                raise ValueError(msg)
            condition, runnable = branch
-            condition = cast(Runnable[Input, bool], coerce_to_runnable(condition))
+            condition = cast("Runnable[Input, bool]", coerce_to_runnable(condition))
            runnable = coerce_to_runnable(runnable)
            _branches.append((condition, runnable))

--- a/libs/core/langchain_core/runnables/config.py
+++ b/libs/core/langchain_core/runnables/config.py
@ -200,7 +200,7 @@ def ensure_config(config: Optional[RunnableConfig] = None) -> RunnableConfig:
    if var_config := var_child_runnable_config.get():
        empty.update(
            cast(
-                RunnableConfig,
+                "RunnableConfig",
                {
                    k: v.copy() if k in COPIABLE_KEYS else v  # type: ignore[attr-defined]
                    for k, v in var_config.items()
@ -211,7 +211,7 @@ def ensure_config(config: Optional[RunnableConfig] = None) -> RunnableConfig:
    if config is not None:
        empty.update(
            cast(
-                RunnableConfig,
+                "RunnableConfig",
                {
                    k: v.copy() if k in COPIABLE_KEYS else v  # type: ignore[attr-defined]
                    for k, v in config.items()
@ -271,7 +271,7 @@ def get_config_list(
            stacklevel=3,
        )
        subsequent = cast(
-            RunnableConfig, {k: v for k, v in config.items() if k != "run_id"}
+            "RunnableConfig", {k: v for k, v in config.items() if k != "run_id"}
        )
        return [
            ensure_config(subsequent) if i else ensure_config(config)
@ -533,7 +533,7 @@ class ContextThreadPoolExecutor(ThreadPoolExecutor):
            Future[T]: The future for the function.
        """
        return super().submit(
-            cast(Callable[..., T], partial(copy_context().run, func, *args, **kwargs))
+            cast("Callable[..., T]", partial(copy_context().run, func, *args, **kwargs))
        )

    def map(
@ -621,7 +621,7 @@ async def run_in_executor(
        # Use default executor with context copied from current context
        return await asyncio.get_running_loop().run_in_executor(
            None,
-            cast(Callable[..., T], partial(copy_context().run, wrapper)),
+            cast("Callable[..., T]", partial(copy_context().run, wrapper)),
        )

    return await asyncio.get_running_loop().run_in_executor(executor_or_config, wrapper)
--- a/libs/core/langchain_core/runnables/configurable.py
+++ b/libs/core/langchain_core/runnables/configurable.py
@ -122,7 +122,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
        runnable: Runnable[Input, Output] = self
        while isinstance(runnable, DynamicRunnable):
            runnable, config = runnable._prepare(merge_configs(runnable.config, config))
-        return runnable, cast(RunnableConfig, config)
+        return runnable, cast("RunnableConfig", config)

    @abstractmethod
    def _prepare(
@ -178,10 +178,10 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):

        # If there's only one input, don't bother with the executor
        if len(inputs) == 1:
-            return cast(list[Output], [invoke(prepared[0], inputs[0])])
+            return cast("list[Output]", [invoke(prepared[0], inputs[0])])

        with get_executor_for_config(configs[0]) as executor:
-            return cast(list[Output], list(executor.map(invoke, prepared, inputs)))
+            return cast("list[Output]", list(executor.map(invoke, prepared, inputs)))

    async def abatch(
        self,
@ -271,7 +271,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
                        and "configurable" in arg
                        and isinstance(arg["configurable"], dict)
                    ):
-                        runnable, config = self.prepare(cast(RunnableConfig, arg))
+                        runnable, config = self.prepare(cast("RunnableConfig", arg))
                        kwargs = {**kwargs, "config": config}
                        return getattr(runnable, name)(*args, **kwargs)

@ -281,7 +281,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
                        and "configurable" in arg
                        and isinstance(arg["configurable"], dict)
                    ):
-                        runnable, config = self.prepare(cast(RunnableConfig, arg))
+                        runnable, config = self.prepare(cast("RunnableConfig", arg))
                        argsl = list(args)
                        argsl[idx] = config
                        return getattr(runnable, name)(*argsl, **kwargs)
@ -563,7 +563,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
                        for v in list(self.alternatives.keys()) + [self.default_key]
                    ),
                )
-                _enums_for_spec[self.which] = cast(type[StrEnum], which_enum)
+                _enums_for_spec[self.which] = cast("type[StrEnum]", which_enum)
        return get_unique_config_specs(
            # which alternative
            [
@ -617,7 +617,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
        # remap configurable keys for the chosen alternative
        if self.prefix_keys:
            config = cast(
-                RunnableConfig,
+                "RunnableConfig",
                {
                    **config,
                    "configurable": {
@ -696,7 +696,7 @@ def make_options_spec(
                spec.name or spec.id,
                ((v, v) for v in list(spec.options.keys())),
            )
-            _enums_for_spec[spec] = cast(type[StrEnum], enum)
+            _enums_for_spec[spec] = cast("type[StrEnum]", enum)
    if isinstance(spec, ConfigurableFieldSingleOption):
        return ConfigurableFieldSpec(
            id=spec.id,
--- a/libs/core/langchain_core/runnables/fallbacks.py
+++ b/libs/core/langchain_core/runnables/fallbacks.py
@ -317,12 +317,12 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
                    if not return_exceptions:
                        first_to_raise = first_to_raise or output
                    else:
-                        handled_exceptions[i] = cast(BaseException, output)
+                        handled_exceptions[i] = cast("BaseException", output)
                    run_again.pop(i)
                elif isinstance(output, self.exceptions_to_handle):
                    if self.exception_key:
                        input[self.exception_key] = output  # type: ignore
-                    handled_exceptions[i] = cast(BaseException, output)
+                    handled_exceptions[i] = cast("BaseException", output)
                else:
                    run_managers[i].on_chain_end(output)
                    to_return[i] = output
@ -413,12 +413,12 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
                    if not return_exceptions:
                        first_to_raise = first_to_raise or output
                    else:
-                        handled_exceptions[i] = cast(BaseException, output)
+                        handled_exceptions[i] = cast("BaseException", output)
                    run_again.pop(i)
                elif isinstance(output, self.exceptions_to_handle):
                    if self.exception_key:
                        input[self.exception_key] = output  # type: ignore
-                    handled_exceptions[i] = cast(BaseException, output)
+                    handled_exceptions[i] = cast("BaseException", output)
                else:
                    to_return[i] = output
                    await run_managers[i].on_chain_end(output)
@ -547,7 +547,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
                            context=context,
                        )
                    else:
-                        chunk = cast(Output, await py_anext(stream))
+                        chunk = cast("Output", await py_anext(stream))
            except self.exceptions_to_handle as e:
                first_error = e if first_error is None else first_error
                last_error = e
--- a/libs/core/langchain_core/runnables/learnable.py
+++ b/libs/core/langchain_core/runnables/learnable.py
@ -1,15 +0,0 @@
-# from langchain_core.runnables.base import RunnableBinding
-
-
-# class RunnableLearnable(RunnableBinding):
-#     def __init__(self, *args, **kwargs):
-#         super().__init__(*args, **kwargs)
-#         self.parameters = []
-
-#     def backward(self):
-#         for param in self.parameters:
-#             param.backward()
-
-#     def update(self, optimizer):
-#         for param in self.parameters:
-#             optimizer.update(param)
--- a/libs/core/langchain_core/runnables/passthrough.py
+++ b/libs/core/langchain_core/runnables/passthrough.py
@ -565,7 +565,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
                if filtered:
                    yield filtered
            # yield map output
-            yield cast(dict[str, Any], first_map_chunk_future.result())
+            yield cast("dict[str, Any]", first_map_chunk_future.result())
            for chunk in map_output:
                yield chunk

--- a/libs/core/langchain_core/runnables/retry.py
+++ b/libs/core/langchain_core/runnables/retry.py
@ -245,7 +245,7 @@ class RunnableRetry(RunnableBindingBase[Input, Output]):
                    attempt.retry_state.set_result(result)
        except RetryError as e:
            if result is not_set:
-                result = cast(list[Output], [e] * len(inputs))
+                result = cast("list[Output]", [e] * len(inputs))

        outputs: list[Union[Output, Exception]] = []
        for idx in range(len(inputs)):
@ -311,7 +311,7 @@ class RunnableRetry(RunnableBindingBase[Input, Output]):
                    attempt.retry_state.set_result(result)
        except RetryError as e:
            if result is not_set:
-                result = cast(list[Output], [e] * len(inputs))
+                result = cast("list[Output]", [e] * len(inputs))

        outputs: list[Union[Output, Exception]] = []
        for idx in range(len(inputs)):
--- a/libs/core/langchain_core/runnables/router.py
+++ b/libs/core/langchain_core/runnables/router.py
@ -158,7 +158,7 @@ class RouterRunnable(RunnableSerializable[RouterInput, Output]):
        configs = get_config_list(config, len(inputs))
        with get_executor_for_config(configs[0]) as executor:
            return cast(
-                list[Output],
+                "list[Output]",
                list(executor.map(invoke, runnables, actual_inputs, configs)),
            )

--- a/libs/core/langchain_core/tools/base.py
+++ b/libs/core/langchain_core/tools/base.py
@ -941,11 +941,11 @@ def _prep_run_args(
 ) -> tuple[Union[str, dict], dict]:
    config = ensure_config(config)
    if _is_tool_call(input):
-        tool_call_id: Optional[str] = cast(ToolCall, input)["id"]
-        tool_input: Union[str, dict] = cast(ToolCall, input)["args"].copy()
+        tool_call_id: Optional[str] = cast("ToolCall", input)["id"]
+        tool_input: Union[str, dict] = cast("ToolCall", input)["args"].copy()
    else:
        tool_call_id = None
-        tool_input = cast(Union[str, dict], input)
+        tool_input = cast("Union[str, dict]", input)
    return (
        tool_input,
        dict(
--- a/libs/core/langchain_core/tracers/context.py
+++ b/libs/core/langchain_core/tracers/context.py
@ -128,9 +128,7 @@ def _get_trace_callbacks(
            example_id=example_id,
        )
        if callback_manager is None:
-            from langchain_core.callbacks.base import Callbacks
-
-            cb = cast(Callbacks, [tracer])
+            cb = cast("Callbacks", [tracer])
        else:
            if not any(
                isinstance(handler, LangChainTracer)
@ -206,13 +204,12 @@ def register_configure_hook(
    if env_var is not None and handle_class is None:
        msg = "If env_var is set, handle_class must also be set to a non-None value."
        raise ValueError(msg)
-    from langchain_core.callbacks.base import BaseCallbackHandler

    _configure_hooks.append(
        (
            # the typings of ContextVar do not have the generic arg set as covariant
            # so we have to cast it
-            cast(ContextVar[Optional[BaseCallbackHandler]], context_var),
+            cast("ContextVar[Optional[BaseCallbackHandler]]", context_var),
            inheritable,
            handle_class,
            env_var,
--- a/libs/core/langchain_core/tracers/core.py
+++ b/libs/core/langchain_core/tracers/core.py
@ -285,7 +285,7 @@ class _TracerCore(ABC):
                output_generation = llm_run.outputs["generations"][i][j]
                if "message" in output_generation:
                    output_generation["message"] = dumpd(
-                        cast(ChatGeneration, generation).message
+                        cast("ChatGeneration", generation).message
                    )
        llm_run.end_time = datetime.now(timezone.utc)
        llm_run.events.append({"name": "end", "time": llm_run.end_time})
--- a/libs/core/langchain_core/tracers/evaluation.py
+++ b/libs/core/langchain_core/tracers/evaluation.py
@ -29,7 +29,6 @@ _TRACERS: weakref.WeakSet[EvaluatorCallbackHandler] = weakref.WeakSet()

 def wait_for_all_evaluators() -> None:
    """Wait for all tracers to finish."""
-    global _TRACERS
    for tracer in list(_TRACERS):
        if tracer is not None:
            tracer.wait_for_futures()
@ -91,7 +90,7 @@ class EvaluatorCallbackHandler(BaseTracer):
            self.executor = ThreadPoolExecutor(max_workers=max_concurrency)
            weakref.finalize(
                self,
-                lambda: cast(ThreadPoolExecutor, self.executor).shutdown(wait=True),
+                lambda: cast("ThreadPoolExecutor", self.executor).shutdown(wait=True),
            )
        else:
            self.executor = None
@ -100,7 +99,6 @@ class EvaluatorCallbackHandler(BaseTracer):
        self.project_name = project_name
        self.logged_eval_results: dict[tuple[str, str], list[EvaluationResult]] = {}
        self.lock = threading.Lock()
-        global _TRACERS
        _TRACERS.add(self)

    def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
@ -158,7 +156,7 @@ class EvaluatorCallbackHandler(BaseTracer):
        if isinstance(results, EvaluationResult):
            results_ = [results]
        elif isinstance(results, dict) and "results" in results:
-            results_ = cast(list[EvaluationResult], results["results"])
+            results_ = cast("list[EvaluationResult]", results["results"])
        else:
            msg = (
                f"Invalid evaluation result type {type(results)}."
--- a/libs/core/langchain_core/tracers/event_stream.py
+++ b/libs/core/langchain_core/tracers/event_stream.py
@ -185,7 +185,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
        run_info = self.run_map.get(run_id)
        if run_info is None:
            # run has finished, don't issue any stream events
-            yield cast(T, first)
+            yield cast("T", first)
            return
        if tap is sentinel:
            # if we are the first to tap, issue stream events
@ -199,7 +199,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                "parent_ids": self._get_parent_ids(run_id),
            }
            self._send({**event, "data": {"chunk": first}}, run_info["run_type"])
-            yield cast(T, first)
+            yield cast("T", first)
            # consume the rest of the output
            async for chunk in output:
                self._send(
@ -209,7 +209,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                yield chunk
        else:
            # otherwise just pass through
-            yield cast(T, first)
+            yield cast("T", first)
            # consume the rest of the output
            async for chunk in output:
                yield chunk
@ -235,7 +235,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
        run_info = self.run_map.get(run_id)
        if run_info is None:
            # run has finished, don't issue any stream events
-            yield cast(T, first)
+            yield cast("T", first)
            return
        if tap is sentinel:
            # if we are the first to tap, issue stream events
@ -249,7 +249,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                "parent_ids": self._get_parent_ids(run_id),
            }
            self._send({**event, "data": {"chunk": first}}, run_info["run_type"])
-            yield cast(T, first)
+            yield cast("T", first)
            # consume the rest of the output
            for chunk in output:
                self._send(
@ -259,7 +259,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                yield chunk
        else:
            # otherwise just pass through
-            yield cast(T, first)
+            yield cast("T", first)
            # consume the rest of the output
            for chunk in output:
                yield chunk
@ -423,14 +423,14 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
            if chunk is None:
                chunk_ = AIMessageChunk(content=token)
            else:
-                chunk_ = cast(ChatGenerationChunk, chunk).message
+                chunk_ = cast("ChatGenerationChunk", chunk).message

        elif run_info["run_type"] == "llm":
            event = "on_llm_stream"
            if chunk is None:
                chunk_ = GenerationChunk(text=token)
            else:
-                chunk_ = cast(GenerationChunk, chunk)
+                chunk_ = cast("GenerationChunk", chunk)
        else:
            msg = f"Unexpected run type: {run_info['run_type']}"
            raise ValueError(msg)
@ -461,7 +461,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
        output: Union[dict, BaseMessage] = {}

        if run_info["run_type"] == "chat_model":
-            generations = cast(list[list[ChatGenerationChunk]], response.generations)
+            generations = cast("list[list[ChatGenerationChunk]]", response.generations)
            for gen in generations:
                if output != {}:
                    break
@ -471,7 +471,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand

            event = "on_chat_model_end"
        elif run_info["run_type"] == "llm":
-            generations = cast(list[list[GenerationChunk]], response.generations)
+            generations = cast("list[list[GenerationChunk]]", response.generations)
            output = {
                "generations": [
                    [
@ -942,7 +942,7 @@ async def _astream_events_implementation_v2(

    # Assign the stream handler to the config
    config = ensure_config(config)
-    run_id = cast(UUID, config.setdefault("run_id", uuid4()))
+    run_id = cast("UUID", config.setdefault("run_id", uuid4()))
    callbacks = config.get("callbacks")
    if callbacks is None:
        config["callbacks"] = [event_streamer]
--- a/libs/core/langchain_core/tracers/langchain.py
+++ b/libs/core/langchain_core/tracers/langchain.py
@ -41,7 +41,6 @@ def log_error_once(method: str, exception: Exception) -> None:
        method: The method that raised the exception.
        exception: The exception that was raised.
    """
-    global _LOGGED
    if (method, type(exception)) in _LOGGED:
        return
    _LOGGED.add((method, type(exception)))
@ -61,7 +60,7 @@ def get_client() -> Client:

 def _get_executor() -> ThreadPoolExecutor:
    """Get the executor."""
-    global _EXECUTOR
+    global _EXECUTOR  # noqa: PLW0603
    if _EXECUTOR is None:
        _EXECUTOR = ThreadPoolExecutor()
    return _EXECUTOR
--- a/libs/core/langchain_core/utils/_merge.py
+++ b/libs/core/langchain_core/utils/_merge.py
@ -96,9 +96,12 @@ def merge_lists(left: Optional[list], *others: Optional[list]) -> Optional[list]
                    if to_merge:
                        # TODO: Remove this once merge_dict is updated with special
                        # handling for 'type'.
-                        if "type" in e:
-                            e = {k: v for k, v in e.items() if k != "type"}
-                        merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], e)
+                        new_e = (
+                            {k: v for k, v in e.items() if k != "type"}
+                            if "type" in e
+                            else e
+                        )
+                        merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], new_e)
                    else:
                        merged.append(e)
                else:
--- a/libs/core/langchain_core/utils/aiter.py
+++ b/libs/core/langchain_core/utils/aiter.py
@ -55,7 +55,7 @@ def py_anext(
    """
    try:
        __anext__ = cast(
-            Callable[[AsyncIterator[T]], Awaitable[T]], type(iterator).__anext__
+            "Callable[[AsyncIterator[T]], Awaitable[T]]", type(iterator).__anext__
        )
    except AttributeError as e:
        msg = f"{iterator!r} is not an async iterator"
--- a/libs/core/langchain_core/utils/function_calling.py
+++ b/libs/core/langchain_core/utils/function_calling.py
@ -62,17 +62,36 @@ class ToolDescription(TypedDict):


 def _rm_titles(kv: dict, prev_key: str = "") -> dict:
+    """Recursively removes "title" fields from a JSON schema dictionary.
+
+    Remove "title" fields from the input JSON schema dictionary,
+    except when a "title" appears within a property definition under "properties".
+
+    Args:
+        kv (dict): The input JSON schema as a dictionary.
+        prev_key (str): The key from the parent dictionary, used to identify context.
+
+    Returns:
+        dict: A new dictionary with appropriate "title" fields removed.
+    """
    new_kv = {}
+
    for k, v in kv.items():
        if k == "title":
-            if isinstance(v, dict) and prev_key == "properties" and "title" in v:
+            # If the value is a nested dict and part of a property under "properties",
+            # preserve the title but continue recursion
+            if isinstance(v, dict) and prev_key == "properties":
                new_kv[k] = _rm_titles(v, k)
            else:
+                # Otherwise, remove this "title" key
                continue
        elif isinstance(v, dict):
+            # Recurse into nested dictionaries
            new_kv[k] = _rm_titles(v, k)
        else:
+            # Leave non-dict values untouched
            new_kv[k] = v
+
    return new_kv


@ -225,10 +244,9 @@ convert_python_function_to_openai_function = deprecated(

 def _convert_typed_dict_to_openai_function(typed_dict: type) -> FunctionDescription:
    visited: dict = {}
-    from pydantic.v1 import BaseModel

    model = cast(
-        type[BaseModel],
+        "type[BaseModel]",
        _convert_any_typed_dicts_to_pydantic(typed_dict, visited=visited),
    )
    return _convert_pydantic_to_openai_function(model)  # type: ignore
@ -452,15 +470,17 @@ def convert_to_openai_function(
        if function_copy and "properties" in function_copy:
            oai_function["parameters"] = function_copy
    elif isinstance(function, type) and is_basemodel_subclass(function):
-        oai_function = cast(dict, _convert_pydantic_to_openai_function(function))
+        oai_function = cast("dict", _convert_pydantic_to_openai_function(function))
    elif is_typeddict(function):
        oai_function = cast(
-            dict, _convert_typed_dict_to_openai_function(cast(type, function))
+            "dict", _convert_typed_dict_to_openai_function(cast("type", function))
        )
    elif isinstance(function, BaseTool):
-        oai_function = cast(dict, _format_tool_to_openai_function(function))
+        oai_function = cast("dict", _format_tool_to_openai_function(function))
    elif callable(function):
-        oai_function = cast(dict, _convert_python_function_to_openai_function(function))
+        oai_function = cast(
+            "dict", _convert_python_function_to_openai_function(function)
+        )
    else:
        msg = (
            f"Unsupported function\n\n{function}\n\nFunctions must be passed in"
@ -756,7 +776,7 @@ def _py_38_safe_origin(origin: type) -> type:
        collections.abc.MutableMapping: typing.MutableMapping,
        **origin_union_type_map,
    }
-    return cast(type, origin_map.get(origin, origin))
+    return cast("type", origin_map.get(origin, origin))


 def _recursive_set_additional_properties_false(
--- a/libs/core/langchain_core/utils/json.py
+++ b/libs/core/langchain_core/utils/json.py
@ -64,11 +64,14 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:

    # Process each character in the string one at a time.
    for char in s:
+        new_char = char
        if is_inside_string:
            if char == '"' and not escaped:
                is_inside_string = False
            elif char == "\n" and not escaped:
-                char = "\\n"  # Replace the newline character with the escape sequence.
+                new_char = (
+                    "\\n"  # Replace the newline character with the escape sequence.
+                )
            elif char == "\\":
                escaped = not escaped
            else:
@ -89,7 +92,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
                    return None

        # Append the processed character to the new string.
-        new_chars.append(char)
+        new_chars.append(new_char)

    # If we're still inside a string at the end of processing,
    # we need to close the string.
--- a/libs/core/langchain_core/utils/mustache.py
+++ b/libs/core/langchain_core/utils/mustache.py
@ -125,8 +125,6 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
        ChevronError: If the tag is unclosed.
        ChevronError: If the set delimiter tag is unclosed.
    """
-    global _CURRENT_LINE, _LAST_TAG_LINE
-
    tag_types = {
        "!": "comment",
        "#": "section",
@ -352,32 +350,33 @@ def _get_key(
            if scope in (0, False):
                return scope

+            resolved_scope = scope
            # For every dot separated key
            for child in key.split("."):
                # Return an empty string if falsy, with two exceptions
                # 0 should return 0, and False should return False
-                if scope in (0, False):
-                    return scope
+                if resolved_scope in (0, False):
+                    return resolved_scope
                # Move into the scope
                try:
                    # Try subscripting (Normal dictionaries)
-                    scope = cast(dict[str, Any], scope)[child]
+                    resolved_scope = cast("dict[str, Any]", resolved_scope)[child]
                except (TypeError, AttributeError):
                    try:
-                        scope = getattr(scope, child)
+                        resolved_scope = getattr(resolved_scope, child)
                    except (TypeError, AttributeError):
                        # Try as a list
-                        scope = scope[int(child)]  # type: ignore
+                        resolved_scope = resolved_scope[int(child)]  # type: ignore

            try:
                # This allows for custom falsy data types
                # https://github.com/noahmorrison/chevron/issues/35
-                if scope._CHEVRON_return_scope_when_falsy:  # type: ignore
-                    return scope
+                if resolved_scope._CHEVRON_return_scope_when_falsy:  # type: ignore
+                    return resolved_scope
            except AttributeError:
-                if scope in (0, False):
-                    return scope
-                return scope or ""
+                if resolved_scope in (0, False):
+                    return resolved_scope
+                return resolved_scope or ""
        except (AttributeError, KeyError, IndexError, ValueError):
            # We couldn't find the key in the current scope
            # We'll try again on the next pass
@ -619,7 +618,7 @@ def render(
            scope = _get_key(
                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
            )
-            scopes.insert(0, cast(Literal[False], not scope))
+            scopes.insert(0, cast("Literal[False]", not scope))

        # If we're a partial
        elif tag == "partial":
--- a/libs/core/langchain_core/utils/pydantic.py
+++ b/libs/core/langchain_core/utils/pydantic.py
@ -473,7 +473,7 @@ def _create_root_model(
        except TypeError:
            pass
        custom_root_type = type(name, (RootModel,), base_class_attributes)
-    return cast(type[BaseModel], custom_root_type)
+    return cast("type[BaseModel]", custom_root_type)


@lru_cache(maxsize=256)
@ -598,7 +598,7 @@ def create_model_v2(
    Returns:
        Type[BaseModel]: The created model.
    """
-    field_definitions = cast(dict[str, Any], field_definitions or {})  # type: ignore[no-redef]
+    field_definitions = cast("dict[str, Any]", field_definitions or {})  # type: ignore[no-redef]

    if root:
        if field_definitions:
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@ -17,7 +17,7 @@ dependencies = [
    "pydantic<3.0.0,>=2.7.4; python_full_version >= \"3.12.4\"",
 ]
 name = "langchain-core"
-version = "0.3.48"
+version = "0.3.49"
 description = "Building applications with LLMs through composability"
 readme = "README.md"

@ -28,7 +28,7 @@ repository = "https://github.com/langchain-ai/langchain"

 [dependency-groups]
 lint = [
-    "ruff<1.0.0,>=0.9.2",
+    "ruff<0.12.0,>=0.11.2",
 ]
 typing = [
    "mypy<1.11,>=1.10",
@ -77,7 +77,7 @@ target-version = "py39"


 [tool.ruff.lint]
-select = [ "ANN", "ASYNC", "B", "C4", "COM", "DJ", "E", "EM", "EXE", "F", "FLY", "FURB", "I", "ICN", "INT", "LOG", "N", "NPY", "PD", "PIE", "PTH", "Q", "RSE", "S", "SIM", "SLOT", "T10", "T201", "TC", "TID", "TRY", "UP", "W", "YTT",]
+select = [ "ANN", "ASYNC", "B", "C4", "COM", "DJ", "E", "EM", "EXE", "F", "FLY", "FURB", "I", "ICN", "INT", "LOG", "N", "NPY", "PD", "PIE", "PLW", "PTH", "Q", "RSE", "S", "SIM", "SLOT", "T10", "T201", "TC", "TID", "TRY", "UP", "W", "YTT",]
 ignore = [ "ANN401", "COM812", "UP007", "S110", "S112", "TC001", "TC002", "TC003"]
 flake8-type-checking.runtime-evaluated-base-classes = ["pydantic.BaseModel","langchain_core.load.serializable.Serializable","langchain_core.runnables.base.RunnableSerializable"]
 flake8-annotations.allow-star-arg-any = true
@ -96,6 +96,7 @@ filterwarnings = [ "ignore::langchain_core._api.beta_decorator.LangChainBetaWarn
 classmethod-decorators = [ "classmethod", "langchain_core.utils.pydantic.pre_init", "pydantic.field_validator", "pydantic.v1.root_validator",]

 [tool.ruff.lint.per-file-ignores]
+"langchain_core/utils/mustache.py" = [ "PLW0603",]
 "tests/unit_tests/prompts/test_chat.py" = [ "E501",]
 "tests/unit_tests/runnables/test_runnable.py" = [ "E501",]
 "tests/unit_tests/runnables/test_graph.py" = [ "E501",]
--- a/libs/core/tests/unit_tests/callbacks/test_imports.py
+++ b/libs/core/tests/unit_tests/callbacks/test_imports.py
@ -33,6 +33,8 @@ EXPECTED_ALL = [
    "FileCallbackHandler",
    "adispatch_custom_event",
    "dispatch_custom_event",
+    "UsageMetadataCallbackHandler",
+    "get_usage_metadata_callback",
 ]


--- a/libs/core/tests/unit_tests/callbacks/test_usage_callback.py
+++ b/libs/core/tests/unit_tests/callbacks/test_usage_callback.py
@ -0,0 +1,122 @@
+from typing import Any
+
+from langchain_core.callbacks import (
+    UsageMetadataCallbackHandler,
+    get_usage_metadata_callback,
+)
+from langchain_core.language_models import GenericFakeChatModel
+from langchain_core.messages import AIMessage
+from langchain_core.messages.ai import (
+    InputTokenDetails,
+    OutputTokenDetails,
+    UsageMetadata,
+    add_usage,
+)
+from langchain_core.outputs import ChatResult
+
+usage1 = UsageMetadata(
+    input_tokens=1,
+    output_tokens=2,
+    total_tokens=3,
+)
+usage2 = UsageMetadata(
+    input_tokens=4,
+    output_tokens=5,
+    total_tokens=9,
+)
+usage3 = UsageMetadata(
+    input_tokens=10,
+    output_tokens=20,
+    total_tokens=30,
+    input_token_details=InputTokenDetails(audio=5),
+    output_token_details=OutputTokenDetails(reasoning=10),
+)
+usage4 = UsageMetadata(
+    input_tokens=5,
+    output_tokens=10,
+    total_tokens=15,
+    input_token_details=InputTokenDetails(audio=3),
+    output_token_details=OutputTokenDetails(reasoning=5),
+)
+messages = [
+    AIMessage("Response 1", usage_metadata=usage1),
+    AIMessage("Response 2", usage_metadata=usage2),
+    AIMessage("Response 3", usage_metadata=usage3),
+    AIMessage("Response 4", usage_metadata=usage4),
+]
+
+
+class FakeChatModelWithResponseMetadata(GenericFakeChatModel):
+    model_name: str
+
+    def _generate(self, *args: Any, **kwargs: Any) -> ChatResult:
+        result = super()._generate(*args, **kwargs)
+        result.generations[0].message.response_metadata = {
+            "model_name": self.model_name
+        }
+        return result
+
+
+def test_usage_callback() -> None:
+    llm = FakeChatModelWithResponseMetadata(
+        messages=iter(messages), model_name="test_model"
+    )
+
+    # Test context manager
+    with get_usage_metadata_callback() as cb:
+        _ = llm.invoke("Message 1")
+        _ = llm.invoke("Message 2")
+        total_1_2 = add_usage(usage1, usage2)
+        assert cb.usage_metadata == {"test_model": total_1_2}
+        _ = llm.invoke("Message 3")
+        _ = llm.invoke("Message 4")
+        total_3_4 = add_usage(usage3, usage4)
+        assert cb.usage_metadata == {"test_model": add_usage(total_1_2, total_3_4)}
+
+    # Test via config
+    llm = FakeChatModelWithResponseMetadata(
+        messages=iter(messages[:2]), model_name="test_model"
+    )
+    callback = UsageMetadataCallbackHandler()
+    _ = llm.batch(["Message 1", "Message 2"], config={"callbacks": [callback]})
+    assert callback.usage_metadata == {"test_model": total_1_2}
+
+    # Test multiple models
+    llm_1 = FakeChatModelWithResponseMetadata(
+        messages=iter(messages[:2]), model_name="test_model_1"
+    )
+    llm_2 = FakeChatModelWithResponseMetadata(
+        messages=iter(messages[2:4]), model_name="test_model_2"
+    )
+    callback = UsageMetadataCallbackHandler()
+    _ = llm_1.batch(["Message 1", "Message 2"], config={"callbacks": [callback]})
+    _ = llm_2.batch(["Message 3", "Message 4"], config={"callbacks": [callback]})
+    assert callback.usage_metadata == {
+        "test_model_1": total_1_2,
+        "test_model_2": total_3_4,
+    }
+
+
+async def test_usage_callback_async() -> None:
+    llm = FakeChatModelWithResponseMetadata(
+        messages=iter(messages), model_name="test_model"
+    )
+
+    # Test context manager
+    with get_usage_metadata_callback() as cb:
+        _ = await llm.ainvoke("Message 1")
+        _ = await llm.ainvoke("Message 2")
+        total_1_2 = add_usage(usage1, usage2)
+        assert cb.usage_metadata == {"test_model": total_1_2}
+        _ = await llm.ainvoke("Message 3")
+        _ = await llm.ainvoke("Message 4")
+        total_3_4 = add_usage(usage3, usage4)
+        assert cb.usage_metadata == {"test_model": add_usage(total_1_2, total_3_4)}
+
+    # Test via config
+    llm = FakeChatModelWithResponseMetadata(
+        messages=iter(messages[:2]), model_name="test_model"
+    )
+    callback = UsageMetadataCallbackHandler()
+    _ = await llm.abatch(["Message 1", "Message 2"], config={"callbacks": [callback]})
+    assert callback.usage_metadata == {"test_model": total_1_2}
--- a/libs/core/tests/unit_tests/example_selectors/test_similarity.py
+++ b/libs/core/tests/unit_tests/example_selectors/test_similarity.py
@ -127,7 +127,7 @@ def test_from_examples() -> None:
    assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}

    assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
    assert vector_store.embeddings is embeddings
    assert vector_store.init_arg == "some_init_arg"
    assert vector_store.texts == ["bar"]
@ -153,7 +153,7 @@ async def test_afrom_examples() -> None:
    assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}

    assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
    assert vector_store.embeddings is embeddings
    assert vector_store.init_arg == "some_init_arg"
    assert vector_store.texts == ["bar"]
@ -207,7 +207,7 @@ def test_mmr_from_examples() -> None:
    assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}

    assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
    assert vector_store.embeddings is embeddings
    assert vector_store.init_arg == "some_init_arg"
    assert vector_store.texts == ["bar"]
@ -235,7 +235,7 @@ async def test_mmr_afrom_examples() -> None:
    assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}

    assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
    assert vector_store.embeddings is embeddings
    assert vector_store.init_arg == "some_init_arg"
    assert vector_store.texts == ["bar"]
--- a/libs/core/tests/unit_tests/indexing/test_indexing.py
+++ b/libs/core/tests/unit_tests/indexing/test_indexing.py
@ -822,6 +822,158 @@ async def test_ascoped_full_fails_with_bad_source_ids(
        )


+def test_index_empty_doc_scoped_full(
+    record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
+) -> None:
+    """Test Indexing with scoped_full strategy"""
+    loader = ToyLoader(
+        documents=[
+            Document(
+                page_content="This is a test document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is yet another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is a test document from another source.",
+                metadata={"source": "2"},
+            ),
+        ]
+    )
+
+    with patch.object(
+        record_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
+    ):
+        assert index(
+            loader,
+            record_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 4,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+    with patch.object(
+        record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
+    ):
+        assert index(
+            loader,
+            record_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 4,
+            "num_updated": 0,
+        }
+
+    loader = ToyLoader(documents=[])
+
+    with patch.object(
+        record_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
+    ):
+        assert index(
+            loader,
+            record_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+
+async def test_aindex_empty_doc_scoped_full(
+    arecord_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
+) -> None:
+    """Test Indexing with scoped_full strategy."""
+    loader = ToyLoader(
+        documents=[
+            Document(
+                page_content="This is a test document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is yet another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is a test document from another source.",
+                metadata={"source": "2"},
+            ),
+        ]
+    )
+
+    with patch.object(
+        arecord_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
+    ):
+        assert await aindex(
+            loader,
+            arecord_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 4,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+    with patch.object(
+        arecord_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
+    ):
+        assert await aindex(
+            loader,
+            arecord_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 4,
+            "num_updated": 0,
+        }
+
+    loader = ToyLoader(documents=[])
+
+    with patch.object(
+        arecord_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
+    ):
+        assert await aindex(
+            loader,
+            arecord_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+
 def test_no_delete(
    record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
 ) -> None:
--- a/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
@ -101,7 +101,7 @@ def test_numbered_list() -> None:
        (text2, ["apple", "banana", "cherry"]),
        (text3, []),
    ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
        assert parser.parse(text) == expected
        assert add(parser.transform(t for t in text)) == (expected or None)
        assert list(parser.transform(t for t in text)) == expectedlist
@ -137,7 +137,7 @@ def test_markdown_list() -> None:
        (text2, ["apple", "banana", "cherry"]),
        (text3, []),
    ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
        assert parser.parse(text) == expected
        assert add(parser.transform(t for t in text)) == (expected or None)
        assert list(parser.transform(t for t in text)) == expectedlist
@ -240,7 +240,7 @@ async def test_numbered_list_async() -> None:
        (text2, ["apple", "banana", "cherry"]),
        (text3, []),
    ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
        assert await parser.aparse(text) == expected
        assert await aadd(parser.atransform(aiter_from_iter(t for t in text))) == (
            expected or None
@ -283,7 +283,7 @@ async def test_markdown_list_async() -> None:
        (text2, ["apple", "banana", "cherry"]),
        (text3, []),
    ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
        assert await parser.aparse(text) == expected
        assert await aadd(parser.atransform(aiter_from_iter(t for t in text))) == (
            expected or None
--- a/libs/core/tests/unit_tests/prompts/test_chat.py
+++ b/libs/core/tests/unit_tests/prompts/test_chat.py
@ -929,7 +929,7 @@ async def test_chat_tmpl_serdes(snapshot: SnapshotAssertion) -> None:
            ("system", [{"text": "You are an AI assistant named {name}."}]),
            SystemMessagePromptTemplate.from_template("you are {foo}"),
            cast(
-                tuple,
+                "tuple",
                (
                    "human",
                    [
--- a/libs/core/tests/unit_tests/prompts/test_structured.py
+++ b/libs/core/tests/unit_tests/prompts/test_structured.py
@ -19,7 +19,7 @@ def _fake_runnable(
    if isclass(schema) and is_basemodel_subclass(schema):
        return schema(name="yo", value=value)
    else:
-        params = cast(dict, schema)["parameters"]
+        params = cast("dict", schema)["parameters"]
        return {k: 1 if k != "value" else value for k, v in params.items()}


--- a/libs/core/tests/unit_tests/runnables/test_config.py
+++ b/libs/core/tests/unit_tests/runnables/test_config.py
@ -48,7 +48,7 @@ def test_ensure_config() -> None:
            "tags": ["tag3", "tag4"],
        },
    )
-    config = ctx.run(ensure_config, cast(RunnableConfig, arg))
+    config = ctx.run(ensure_config, cast("RunnableConfig", arg))
    assert len(arg["callbacks"]) == 1, (
        "ensure_config should not modify the original config"
    )
@ -147,7 +147,7 @@ async def test_merge_config_callbacks() -> None:
 def test_config_arbitrary_keys() -> None:
    base: RunnablePassthrough[Any] = RunnablePassthrough()
    bound = base.with_config(my_custom_key="my custom value")
-    config = cast(RunnableBinding, bound).config
+    config = cast("RunnableBinding", bound).config

    assert config.get("my_custom_key") == "my custom value"

--- a/libs/core/tests/unit_tests/runnables/test_runnable.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable.py
@ -2436,7 +2436,7 @@ async def test_stream_log_retriever() -> None:
            ):
                del op["value"]["id"]

-    assert sorted(cast(RunLog, add(stream_log)).state["logs"]) == [
+    assert sorted(cast("RunLog", add(stream_log)).state["logs"]) == [
        "ChatPromptTemplate",
        "FakeListLLM",
        "FakeListLLM:2",
@ -2632,7 +2632,7 @@ def test_combining_sequences(
        lambda x: {"question": x[0] + x[1]}
    )

-    chain2 = cast(RunnableSequence, input_formatter | prompt2 | chat2 | parser2)
+    chain2 = cast("RunnableSequence", input_formatter | prompt2 | chat2 | parser2)

    assert isinstance(chain, RunnableSequence)
    assert chain2.first == input_formatter
@ -2640,7 +2640,7 @@ def test_combining_sequences(
    assert chain2.last == parser2
    assert dumps(chain2, pretty=True) == snapshot

-    combined_chain = cast(RunnableSequence, chain | chain2)
+    combined_chain = cast("RunnableSequence", chain | chain2)

    assert combined_chain.first == prompt
    assert combined_chain.middle == [
@ -3278,7 +3278,7 @@ async def test_map_astream() -> None:
            final_state = chunk
        else:
            final_state += chunk
-    final_state = cast(RunLog, final_state)
+    final_state = cast("RunLog", final_state)

    assert final_state.state["final_output"] == final_value
    assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
@ -3312,7 +3312,7 @@ async def test_map_astream() -> None:
            final_state = chunk
        else:
            final_state += chunk
-    final_state = cast(RunLog, final_state)
+    final_state = cast("RunLog", final_state)

    assert final_state.state["final_output"] == final_value
    assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
@ -3328,7 +3328,7 @@ async def test_map_astream() -> None:
            final_state = chunk
        else:
            final_state += chunk
-    final_state = cast(RunLog, final_state)
+    final_state = cast("RunLog", final_state)

    assert final_state.state["final_output"] == final_value
    assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
@ -4032,7 +4032,7 @@ async def test_runnable_lambda_astream() -> None:
    output = [
        chunk
        async for chunk in cast(
-            AsyncIterator[str], RunnableLambda(lambda x: llm).astream("")
+            "AsyncIterator[str]", RunnableLambda(lambda x: llm).astream("")
        )
    ]
    assert output == list(llm_res)
@ -5350,7 +5350,7 @@ def test_default_transform_with_dicts() -> None:
        def invoke(
            self, input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any
        ) -> Output:
-            return cast(Output, input)  # type: ignore
+            return cast("Output", input)  # type: ignore

    runnable = CustomRunnable[dict[str, str], dict[str, str]]()
    chunks = iter(
@ -5371,7 +5371,7 @@ async def test_default_atransform_with_dicts() -> None:
        def invoke(
            self, input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any
        ) -> Output:
-            return cast(Output, input)
+            return cast("Output", input)

    runnable = CustomRunnable[dict[str, str], dict[str, str]]()

--- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
@ -42,7 +42,7 @@ def _with_nulled_run_id(events: Sequence[StreamEvent]) -> list[StreamEvent]:
        assert "parent_ids" in event, "Parent ids should be present in the event."
        assert event["parent_ids"] == [], "Parent ids should be empty."

-    return cast(list[StreamEvent], [{**event, "run_id": ""} for event in events])
+    return cast("list[StreamEvent]", [{**event, "run_id": ""} for event in events])


 async def _as_async_iterator(iterable: list) -> AsyncIterator:
--- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py
@ -68,7 +68,7 @@ def _with_nulled_run_id(events: Sequence[StreamEvent]) -> list[StreamEvent]:
        )

    return cast(
-        list[StreamEvent],
+        "list[StreamEvent]",
        [{**event, "run_id": "", "parent_ids": []} for event in events],
    )

--- a/libs/core/tests/unit_tests/test_imports.py
+++ b/libs/core/tests/unit_tests/test_imports.py
@ -22,7 +22,7 @@ def try_to_import(module_name: str) -> tuple[int, str]:
        getattr(module, cls_)

    result = subprocess.run(
-        ["python", "-c", f"import langchain_core.{module_name}"],
+        ["python", "-c", f"import langchain_core.{module_name}"], check=True
    )
    return result.returncode, module_name

--- a/libs/core/tests/unit_tests/test_tools.py
+++ b/libs/core/tests/unit_tests/test_tools.py
@ -60,7 +60,10 @@ from langchain_core.tools.base import (
    _is_message_content_type,
    get_all_basemodel_annotations,
 )
-from langchain_core.utils.function_calling import convert_to_openai_function
+from langchain_core.utils.function_calling import (
+    convert_to_openai_function,
+    convert_to_openai_tool,
+)
 from langchain_core.utils.pydantic import (
    PYDANTIC_MAJOR_VERSION,
    _create_subset_model,
@ -200,7 +203,7 @@ def test_decorator_with_specified_schema() -> None:
    assert isinstance(tool_func, BaseTool)
    assert tool_func.args_schema == _MockSchema

-    @tool(args_schema=cast(ArgsSchema, _MockSchemaV1))
+    @tool(args_schema=cast("ArgsSchema", _MockSchemaV1))
    def tool_func_v1(arg1: int, arg2: bool, arg3: Optional[dict] = None) -> str:
        return f"{arg1} {arg2} {arg3}"

@ -1932,7 +1935,7 @@ def test_structured_tool_with_different_pydantic_versions(pydantic_model: Any) -

    assert foo_tool.invoke({"a": 5, "b": "hello"}) == "foo"

-    args_schema = cast(BaseModel, foo_tool.args_schema)
+    args_schema = cast("BaseModel", foo_tool.args_schema)
    args_json_schema = (
        args_schema.model_json_schema()
        if hasattr(args_schema, "model_json_schema")
@ -2481,7 +2484,7 @@ def test_tool_decorator_description() -> None:

    assert foo.description == "Foo."
    assert (
-        cast(BaseModel, foo.tool_call_schema).model_json_schema()["description"]
+        cast("BaseModel", foo.tool_call_schema).model_json_schema()["description"]
        == "Foo."
    )

@ -2493,7 +2496,7 @@ def test_tool_decorator_description() -> None:

    assert foo_description.description == "description"
    assert (
-        cast(BaseModel, foo_description.tool_call_schema).model_json_schema()[
+        cast("BaseModel", foo_description.tool_call_schema).model_json_schema()[
            "description"
        ]
        == "description"
@ -2511,7 +2514,7 @@ def test_tool_decorator_description() -> None:

    assert foo_args_schema.description == "Bar."
    assert (
-        cast(BaseModel, foo_args_schema.tool_call_schema).model_json_schema()[
+        cast("BaseModel", foo_args_schema.tool_call_schema).model_json_schema()[
            "description"
        ]
        == "Bar."
@ -2524,7 +2527,7 @@ def test_tool_decorator_description() -> None:
    assert foo_args_schema_description.description == "description"
    assert (
        cast(
-            BaseModel, foo_args_schema_description.tool_call_schema
+            "BaseModel", foo_args_schema_description.tool_call_schema
        ).model_json_schema()["description"]
        == "description"
    )
@ -2549,14 +2552,55 @@ def test_tool_decorator_description() -> None:

    assert foo_args_jsons_schema.description == "JSON Schema."
    assert (
-        cast(dict, foo_args_jsons_schema.tool_call_schema)["description"]
+        cast("dict", foo_args_jsons_schema.tool_call_schema)["description"]
        == "JSON Schema."
    )

    assert foo_args_jsons_schema_with_description.description == "description"
    assert (
-        cast(dict, foo_args_jsons_schema_with_description.tool_call_schema)[
+        cast("dict", foo_args_jsons_schema_with_description.tool_call_schema)[
            "description"
        ]
        == "description"
    )
+
+
+def test_title_property_preserved() -> None:
+    """Test that the title property is preserved when generating schema.
+
+    https://github.com/langchain-ai/langchain/issues/30456
+    """
+    from typing import Any
+
+    from langchain_core.tools import tool
+
+    schema_to_be_extracted = {
+        "type": "object",
+        "required": [],
+        "properties": {
+            "title": {"type": "string", "description": "item title"},
+            "due_date": {"type": "string", "description": "item due date"},
+        },
+        "description": "foo",
+    }
+
+    @tool(args_schema=schema_to_be_extracted)
+    def extract_data(extracted_data: dict[str, Any]) -> dict[str, Any]:
+        """Some documentation."""
+        return extracted_data
+
+    assert convert_to_openai_tool(extract_data) == {
+        "function": {
+            "description": "Some documentation.",
+            "name": "extract_data",
+            "parameters": {
+                "properties": {
+                    "due_date": {"description": "item due date", "type": "string"},
+                    "title": {"description": "item title", "type": "string"},
+                },
+                "required": [],
+                "type": "object",
+            },
+        },
+        "type": "function",
+    }
--- a/libs/core/tests/unit_tests/utils/test_rm_titles.py
+++ b/libs/core/tests/unit_tests/utils/test_rm_titles.py
@ -190,10 +190,44 @@ schema4 = {
    "required": ["properties"],
 }

+schema5 = {
+    "description": "A list of data.",
+    "items": {
+        "description": "foo",
+        "properties": {
+            "title": {"type": "string", "description": "item title"},
+            "due_date": {"type": "string", "description": "item due date"},
+        },
+        "required": [],
+        "type": "object",
+    },
+    "type": "array",
+}
+
+output5 = {
+    "description": "A list of data.",
+    "items": {
+        "description": "foo",
+        "properties": {
+            "title": {"type": "string", "description": "item title"},
+            "due_date": {"type": "string", "description": "item due date"},
+        },
+        "required": [],
+        "type": "object",
+    },
+    "type": "array",
+}
+

@pytest.mark.parametrize(
    "schema, output",
-    [(schema1, output1), (schema2, output2), (schema3, output3), (schema4, output4)],
+    [
+        (schema1, output1),
+        (schema2, output2),
+        (schema3, output3),
+        (schema4, output4),
+        (schema5, output5),
+    ],
 )
 def test_rm_titles(schema: dict, output: dict) -> None:
    assert _rm_titles(schema) == output
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@ -936,7 +936,7 @@ wheels = [

 [[package]]
 name = "langchain-core"
-version = "0.3.48"
+version = "0.3.49"
 source = { editable = "." }
 dependencies = [
    { name = "jsonpatch" },
@ -999,7 +999,7 @@ dev = [
    { name = "jupyter", specifier = ">=1.0.0,<2.0.0" },
    { name = "setuptools", specifier = ">=67.6.1,<68.0.0" },
 ]
-lint = [{ name = "ruff", specifier = ">=0.9.2,<1.0.0" }]
+lint = [{ name = "ruff", specifier = ">=0.11.2,<0.12.0" }]
 test = [
    { name = "blockbuster", specifier = "~=1.5.18" },
    { name = "freezegun", specifier = ">=1.2.2,<2.0.0" },
@ -1027,7 +1027,7 @@ typing = [

 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.17"
 source = { directory = "../standard-tests" }
 dependencies = [
    { name = "httpx" },
@ -2262,27 +2262,27 @@ wheels = [

 [[package]]
 name = "ruff"
-version = "0.9.4"
+version = "0.11.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c0/17/529e78f49fc6f8076f50d985edd9a2cf011d1dbadb1cdeacc1d12afc1d26/ruff-0.9.4.tar.gz", hash = "sha256:6907ee3529244bb0ed066683e075f09285b38dd5b4039370df6ff06041ca19e7", size = 3599458 }
+sdist = { url = "https://files.pythonhosted.org/packages/90/61/fb87430f040e4e577e784e325351186976516faef17d6fcd921fe28edfd7/ruff-0.11.2.tar.gz", hash = "sha256:ec47591497d5a1050175bdf4e1a4e6272cddff7da88a2ad595e1e326041d8d94", size = 3857511 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b6/f8/3fafb7804d82e0699a122101b5bee5f0d6e17c3a806dcbc527bb7d3f5b7a/ruff-0.9.4-py3-none-linux_armv6l.whl", hash = "sha256:64e73d25b954f71ff100bb70f39f1ee09e880728efb4250c632ceed4e4cdf706", size = 11668400 },
-    { url = "https://files.pythonhosted.org/packages/2e/a6/2efa772d335da48a70ab2c6bb41a096c8517ca43c086ea672d51079e3d1f/ruff-0.9.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6ce6743ed64d9afab4fafeaea70d3631b4d4b28b592db21a5c2d1f0ef52934bf", size = 11628395 },
-    { url = "https://files.pythonhosted.org/packages/dc/d7/cd822437561082f1c9d7225cc0d0fbb4bad117ad7ac3c41cd5d7f0fa948c/ruff-0.9.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54499fb08408e32b57360f6f9de7157a5fec24ad79cb3f42ef2c3f3f728dfe2b", size = 11090052 },
-    { url = "https://files.pythonhosted.org/packages/9e/67/3660d58e893d470abb9a13f679223368ff1684a4ef40f254a0157f51b448/ruff-0.9.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c892540108314a6f01f105040b5106aeb829fa5fb0561d2dcaf71485021137", size = 11882221 },
-    { url = "https://files.pythonhosted.org/packages/79/d1/757559995c8ba5f14dfec4459ef2dd3fcea82ac43bc4e7c7bf47484180c0/ruff-0.9.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:de9edf2ce4b9ddf43fd93e20ef635a900e25f622f87ed6e3047a664d0e8f810e", size = 11424862 },
-    { url = "https://files.pythonhosted.org/packages/c0/96/7915a7c6877bb734caa6a2af424045baf6419f685632469643dbd8eb2958/ruff-0.9.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c90c32357c74f11deb7fbb065126d91771b207bf9bfaaee01277ca59b574ec", size = 12626735 },
-    { url = "https://files.pythonhosted.org/packages/0e/cc/dadb9b35473d7cb17c7ffe4737b4377aeec519a446ee8514123ff4a26091/ruff-0.9.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:56acd6c694da3695a7461cc55775f3a409c3815ac467279dfa126061d84b314b", size = 13255976 },
-    { url = "https://files.pythonhosted.org/packages/5f/c3/ad2dd59d3cabbc12df308cced780f9c14367f0321e7800ca0fe52849da4c/ruff-0.9.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0c93e7d47ed951b9394cf352d6695b31498e68fd5782d6cbc282425655f687a", size = 12752262 },
-    { url = "https://files.pythonhosted.org/packages/c7/17/5f1971e54bd71604da6788efd84d66d789362b1105e17e5ccc53bba0289b/ruff-0.9.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4c8772670aecf037d1bf7a07c39106574d143b26cfe5ed1787d2f31e800214", size = 14401648 },
-    { url = "https://files.pythonhosted.org/packages/30/24/6200b13ea611b83260501b6955b764bb320e23b2b75884c60ee7d3f0b68e/ruff-0.9.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc5f1d7afeda8d5d37660eeca6d389b142d7f2b5a1ab659d9214ebd0e025231", size = 12414702 },
-    { url = "https://files.pythonhosted.org/packages/34/cb/f5d50d0c4ecdcc7670e348bd0b11878154bc4617f3fdd1e8ad5297c0d0ba/ruff-0.9.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faa935fc00ae854d8b638c16a5f1ce881bc3f67446957dd6f2af440a5fc8526b", size = 11859608 },
-    { url = "https://files.pythonhosted.org/packages/d6/f4/9c8499ae8426da48363bbb78d081b817b0f64a9305f9b7f87eab2a8fb2c1/ruff-0.9.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a6c634fc6f5a0ceae1ab3e13c58183978185d131a29c425e4eaa9f40afe1e6d6", size = 11485702 },
-    { url = "https://files.pythonhosted.org/packages/18/59/30490e483e804ccaa8147dd78c52e44ff96e1c30b5a95d69a63163cdb15b/ruff-0.9.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:433dedf6ddfdec7f1ac7575ec1eb9844fa60c4c8c2f8887a070672b8d353d34c", size = 12067782 },
-    { url = "https://files.pythonhosted.org/packages/3d/8c/893fa9551760b2f8eb2a351b603e96f15af167ceaf27e27ad873570bc04c/ruff-0.9.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d612dbd0f3a919a8cc1d12037168bfa536862066808960e0cc901404b77968f0", size = 12483087 },
-    { url = "https://files.pythonhosted.org/packages/23/15/f6751c07c21ca10e3f4a51ea495ca975ad936d780c347d9808bcedbd7182/ruff-0.9.4-py3-none-win32.whl", hash = "sha256:db1192ddda2200671f9ef61d9597fcef89d934f5d1705e571a93a67fb13a4402", size = 9852302 },
-    { url = "https://files.pythonhosted.org/packages/12/41/2d2d2c6a72e62566f730e49254f602dfed23019c33b5b21ea8f8917315a1/ruff-0.9.4-py3-none-win_amd64.whl", hash = "sha256:05bebf4cdbe3ef75430d26c375773978950bbf4ee3c95ccb5448940dc092408e", size = 10850051 },
-    { url = "https://files.pythonhosted.org/packages/c6/e6/3d6ec3bc3d254e7f005c543a661a41c3e788976d0e52a1ada195bd664344/ruff-0.9.4-py3-none-win_arm64.whl", hash = "sha256:585792f1e81509e38ac5123492f8875fbc36f3ede8185af0a26df348e5154f41", size = 10078251 },
+    { url = "https://files.pythonhosted.org/packages/62/99/102578506f0f5fa29fd7e0df0a273864f79af044757aef73d1cae0afe6ad/ruff-0.11.2-py3-none-linux_armv6l.whl", hash = "sha256:c69e20ea49e973f3afec2c06376eb56045709f0212615c1adb0eda35e8a4e477", size = 10113146 },
+    { url = "https://files.pythonhosted.org/packages/74/ad/5cd4ba58ab602a579997a8494b96f10f316e874d7c435bcc1a92e6da1b12/ruff-0.11.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2c5424cc1c4eb1d8ecabe6d4f1b70470b4f24a0c0171356290b1953ad8f0e272", size = 10867092 },
+    { url = "https://files.pythonhosted.org/packages/fc/3e/d3f13619e1d152c7b600a38c1a035e833e794c6625c9a6cea6f63dbf3af4/ruff-0.11.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ecf20854cc73f42171eedb66f006a43d0a21bfb98a2523a809931cda569552d9", size = 10224082 },
+    { url = "https://files.pythonhosted.org/packages/90/06/f77b3d790d24a93f38e3806216f263974909888fd1e826717c3ec956bbcd/ruff-0.11.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c543bf65d5d27240321604cee0633a70c6c25c9a2f2492efa9f6d4b8e4199bb", size = 10394818 },
+    { url = "https://files.pythonhosted.org/packages/99/7f/78aa431d3ddebfc2418cd95b786642557ba8b3cb578c075239da9ce97ff9/ruff-0.11.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20967168cc21195db5830b9224be0e964cc9c8ecf3b5a9e3ce19876e8d3a96e3", size = 9952251 },
+    { url = "https://files.pythonhosted.org/packages/30/3e/f11186d1ddfaca438c3bbff73c6a2fdb5b60e6450cc466129c694b0ab7a2/ruff-0.11.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:955a9ce63483999d9f0b8f0b4a3ad669e53484232853054cc8b9d51ab4c5de74", size = 11563566 },
+    { url = "https://files.pythonhosted.org/packages/22/6c/6ca91befbc0a6539ee133d9a9ce60b1a354db12c3c5d11cfdbf77140f851/ruff-0.11.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:86b3a27c38b8fce73bcd262b0de32e9a6801b76d52cdb3ae4c914515f0cef608", size = 12208721 },
+    { url = "https://files.pythonhosted.org/packages/19/b0/24516a3b850d55b17c03fc399b681c6a549d06ce665915721dc5d6458a5c/ruff-0.11.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3b66a03b248c9fcd9d64d445bafdf1589326bee6fc5c8e92d7562e58883e30f", size = 11662274 },
+    { url = "https://files.pythonhosted.org/packages/d7/65/76be06d28ecb7c6070280cef2bcb20c98fbf99ff60b1c57d2fb9b8771348/ruff-0.11.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0397c2672db015be5aa3d4dac54c69aa012429097ff219392c018e21f5085147", size = 13792284 },
+    { url = "https://files.pythonhosted.org/packages/ce/d2/4ceed7147e05852876f3b5f3fdc23f878ce2b7e0b90dd6e698bda3d20787/ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:869bcf3f9abf6457fbe39b5a37333aa4eecc52a3b99c98827ccc371a8e5b6f1b", size = 11327861 },
+    { url = "https://files.pythonhosted.org/packages/c4/78/4935ecba13706fd60ebe0e3dc50371f2bdc3d9bc80e68adc32ff93914534/ruff-0.11.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2a2b50ca35457ba785cd8c93ebbe529467594087b527a08d487cf0ee7b3087e9", size = 10276560 },
+    { url = "https://files.pythonhosted.org/packages/81/7f/1b2435c3f5245d410bb5dc80f13ec796454c21fbda12b77d7588d5cf4e29/ruff-0.11.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7c69c74bf53ddcfbc22e6eb2f31211df7f65054bfc1f72288fc71e5f82db3eab", size = 9945091 },
+    { url = "https://files.pythonhosted.org/packages/39/c4/692284c07e6bf2b31d82bb8c32f8840f9d0627d92983edaac991a2b66c0a/ruff-0.11.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6e8fb75e14560f7cf53b15bbc55baf5ecbe373dd5f3aab96ff7aa7777edd7630", size = 10977133 },
+    { url = "https://files.pythonhosted.org/packages/94/cf/8ab81cb7dd7a3b0a3960c2769825038f3adcd75faf46dd6376086df8b128/ruff-0.11.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:842a472d7b4d6f5924e9297aa38149e5dcb1e628773b70e6387ae2c97a63c58f", size = 11378514 },
+    { url = "https://files.pythonhosted.org/packages/d9/3a/a647fa4f316482dacf2fd68e8a386327a33d6eabd8eb2f9a0c3d291ec549/ruff-0.11.2-py3-none-win32.whl", hash = "sha256:aca01ccd0eb5eb7156b324cfaa088586f06a86d9e5314b0eb330cb48415097cc", size = 10319835 },
+    { url = "https://files.pythonhosted.org/packages/86/54/3c12d3af58012a5e2cd7ebdbe9983f4834af3f8cbea0e8a8c74fa1e23b2b/ruff-0.11.2-py3-none-win_amd64.whl", hash = "sha256:3170150172a8f994136c0c66f494edf199a0bbea7a409f649e4bc8f4d7084080", size = 11373713 },
+    { url = "https://files.pythonhosted.org/packages/d6/d4/dd813703af8a1e2ac33bf3feb27e8a5ad514c9f219df80c64d69807e7f71/ruff-0.11.2-py3-none-win_arm64.whl", hash = "sha256:52933095158ff328f4c77af3d74f0379e34fd52f175144cefc1b192e7ccd32b4", size = 10441990 },
 ]

 [[package]]
--- a/libs/packages.yml
+++ b/libs/packages.yml
@ -556,3 +556,9 @@ packages:
  repo: goat-sdk/goat
  name_title: GOAT SDK
  provider_page: goat
+- name: langchain-memgraph
+  path: .
+  repo: memgraph/langchain-memgraph
+- name: langchain-vectara
+  path: libs/vectara
+  repo: vectara/langchain-vectara
--- a/libs/partners/anthropic/pyproject.toml
+++ b/libs/partners/anthropic/pyproject.toml
@ -30,6 +30,7 @@ test = [
    "pytest-watcher<1.0.0,>=0.3.4",
    "pytest-asyncio<1.0.0,>=0.21.1",
    "defusedxml<1.0.0,>=0.7.1",
+    "pytest-retry<1.8.0,>=1.7.0",
    "pytest-timeout<3.0.0,>=2.3.1",
    "pytest-socket<1.0.0,>=0.7.0",
    "langchain-core",
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@ -730,6 +730,7 @@ def test_thinking() -> None:
            assert block["signature"] and isinstance(block["signature"], str)


+@pytest.mark.flaky(retries=3, delay=1)
 def test_redacted_thinking() -> None:
    llm = ChatAnthropic(
        model="claude-3-7-sonnet-latest",
--- a/libs/partners/anthropic/uv.lock
+++ b/libs/partners/anthropic/uv.lock
@ -432,6 +432,7 @@ test = [
    { name = "pytest" },
    { name = "pytest-asyncio" },
    { name = "pytest-mock" },
+    { name = "pytest-retry" },
    { name = "pytest-socket" },
    { name = "pytest-timeout" },
    { name = "pytest-watcher" },
@ -466,6 +467,7 @@ test = [
    { name = "pytest", specifier = ">=7.3.0,<8.0.0" },
    { name = "pytest-asyncio", specifier = ">=0.21.1,<1.0.0" },
    { name = "pytest-mock", specifier = ">=3.10.0,<4.0.0" },
+    { name = "pytest-retry", specifier = ">=1.7.0,<1.8.0" },
    { name = "pytest-socket", specifier = ">=0.7.0,<1.0.0" },
    { name = "pytest-timeout", specifier = ">=2.3.1,<3.0.0" },
    { name = "pytest-watcher", specifier = ">=0.3.4,<1.0.0" },
@ -483,7 +485,7 @@ typing = [

 [[package]]
 name = "langchain-core"
-version = "0.3.45"
+version = "0.3.48"
 source = { editable = "../../core" }
 dependencies = [
    { name = "jsonpatch" },
@ -541,7 +543,7 @@ typing = [

 [[package]]
 name = "langchain-tests"
-version = "0.3.14"
+version = "0.3.16"
 source = { editable = "../../standard-tests" }
 dependencies = [
    { name = "httpx" },
@ -1009,6 +1011,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863 },
 ]

+[[package]]
+name = "pytest-retry"
+version = "1.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/5b/607b017994cca28de3a1ad22a3eee8418e5d428dcd8ec25b26b18e995a73/pytest_retry-1.7.0.tar.gz", hash = "sha256:f8d52339f01e949df47c11ba9ee8d5b362f5824dff580d3870ec9ae0057df80f", size = 19977 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/ff/3266c8a73b9b93c4b14160a7e2b31d1e1088e28ed29f4c2d93ae34093bfd/pytest_retry-1.7.0-py3-none-any.whl", hash = "sha256:a2dac85b79a4e2375943f1429479c65beb6c69553e7dae6b8332be47a60954f4", size = 13775 },
+]
+
 [[package]]
 name = "pytest-socket"
 version = "0.7.0"
--- a/libs/partners/fireworks/langchain_fireworks/chat_models.py
+++ b/libs/partners/fireworks/langchain_fireworks/chat_models.py
@ -471,6 +471,7 @@ class ChatFireworks(BaseChatModel):
            generation_info = {}
            if finish_reason := choice.get("finish_reason"):
                generation_info["finish_reason"] = finish_reason
+                generation_info["model_name"] = self.model_name
            logprobs = choice.get("logprobs")
            if logprobs:
                generation_info["logprobs"] = logprobs
@ -565,6 +566,7 @@ class ChatFireworks(BaseChatModel):
            generation_info = {}
            if finish_reason := choice.get("finish_reason"):
                generation_info["finish_reason"] = finish_reason
+                generation_info["model_name"] = self.model_name
            logprobs = choice.get("logprobs")
            if logprobs:
                generation_info["logprobs"] = logprobs
--- a/libs/partners/fireworks/pyproject.toml
+++ b/libs/partners/fireworks/pyproject.toml
@ -7,14 +7,14 @@ authors = []
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.47",
+    "langchain-core<1.0.0,>=0.3.49",
    "fireworks-ai>=0.13.0",
    "openai<2.0.0,>=1.10.0",
    "requests<3,>=2",
    "aiohttp<4.0.0,>=3.9.1",
 ]
 name = "langchain-fireworks"
-version = "0.2.8"
+version = "0.2.9"
 description = "An integration package connecting Fireworks and LangChain"
 readme = "README.md"

--- a/libs/partners/fireworks/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/fireworks/tests/integration_tests/test_chat_models.py
@ -98,16 +98,19 @@ async def test_astream() -> None:

    full: Optional[BaseMessageChunk] = None
    chunks_with_token_counts = 0
+    chunks_with_response_metadata = 0
    async for token in llm.astream("I'm Pickle Rick"):
        assert isinstance(token, AIMessageChunk)
        assert isinstance(token.content, str)
        full = token if full is None else full + token
        if token.usage_metadata is not None:
            chunks_with_token_counts += 1
-    if chunks_with_token_counts != 1:
+        if token.response_metadata:
+            chunks_with_response_metadata += 1
+    if chunks_with_token_counts != 1 or chunks_with_response_metadata != 1:
        raise AssertionError(
-            "Expected exactly one chunk with token counts. "
-            "AIMessageChunk aggregation adds counts. Check that "
+            "Expected exactly one chunk with token counts or response_metadata. "
+            "AIMessageChunk aggregation adds / appends counts and metadata. Check that "
            "this is behaving properly."
        )
    assert isinstance(full, AIMessageChunk)
@ -118,6 +121,8 @@ async def test_astream() -> None:
        full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"]
        == full.usage_metadata["total_tokens"]
    )
+    assert isinstance(full.response_metadata["model_name"], str)
+    assert full.response_metadata["model_name"]


 async def test_abatch() -> None:
--- a/libs/partners/fireworks/uv.lock
+++ b/libs/partners/fireworks/uv.lock
@ -635,7 +635,7 @@ wheels = [

 [[package]]
 name = "langchain-core"
-version = "0.3.47"
+version = "0.3.49"
 source = { editable = "../../core" }
 dependencies = [
    { name = "jsonpatch" },
@ -693,7 +693,7 @@ typing = [

 [[package]]
 name = "langchain-fireworks"
-version = "0.2.8"
+version = "0.2.9"
 source = { editable = "." }
 dependencies = [
    { name = "aiohttp" },
@ -763,7 +763,7 @@ typing = [

 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.17"
 source = { editable = "../../standard-tests" }
 dependencies = [
    { name = "httpx" },
--- a/libs/partners/mistralai/langchain_mistralai/chat_models.py
+++ b/libs/partners/mistralai/langchain_mistralai/chat_models.py
@ -236,13 +236,15 @@ async def acompletion_with_retry(
 def _convert_chunk_to_message_chunk(
    chunk: Dict, default_class: Type[BaseMessageChunk]
 ) -> BaseMessageChunk:
-    _delta = chunk["choices"][0]["delta"]
+    _choice = chunk["choices"][0]
+    _delta = _choice["delta"]
    role = _delta.get("role")
    content = _delta.get("content") or ""
    if role == "user" or default_class == HumanMessageChunk:
        return HumanMessageChunk(content=content)
    elif role == "assistant" or default_class == AIMessageChunk:
        additional_kwargs: Dict = {}
+        response_metadata = {}
        if raw_tool_calls := _delta.get("tool_calls"):
            additional_kwargs["tool_calls"] = raw_tool_calls
            try:
@ -272,11 +274,16 @@ def _convert_chunk_to_message_chunk(
            }
        else:
            usage_metadata = None
+        if _choice.get("finish_reason") is not None and isinstance(
+            chunk.get("model"), str
+        ):
+            response_metadata["model_name"] = chunk.get("model")
        return AIMessageChunk(
            content=content,
            additional_kwargs=additional_kwargs,
            tool_call_chunks=tool_call_chunks,  # type: ignore[arg-type]
            usage_metadata=usage_metadata,  # type: ignore[arg-type]
+            response_metadata=response_metadata,
        )
    elif role == "system" or default_class == SystemMessageChunk:
        return SystemMessageChunk(content=content)
@ -457,9 +464,9 @@ class ChatMistralAI(BaseChatModel):
        self, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any
    ) -> Any:
        """Use tenacity to retry the completion call."""
-        # retry_decorator = _create_retry_decorator(self, run_manager=run_manager)
+        retry_decorator = _create_retry_decorator(self, run_manager=run_manager)

-        # @retry_decorator
+        @retry_decorator
        def _completion_with_retry(**kwargs: Any) -> Any:
            if "stream" not in kwargs:
                kwargs["stream"] = False
--- a/libs/partners/mistralai/pyproject.toml
+++ b/libs/partners/mistralai/pyproject.toml
@ -7,14 +7,14 @@ authors = []
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.47",
+    "langchain-core<1.0.0,>=0.3.49",
    "tokenizers<1,>=0.15.1",
    "httpx<1,>=0.25.2",
    "httpx-sse<1,>=0.3.1",
    "pydantic<3,>=2",
 ]
 name = "langchain-mistralai"
-version = "0.2.9"
+version = "0.2.10"
 description = "An integration package connecting Mistral and LangChain"
 readme = "README.md"

--- a/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
@ -1,9 +1,12 @@
 """Test ChatMistral chat model."""

 import json
+import logging
+import time
 from typing import Any, Optional

 import pytest
+from httpx import ReadTimeout
 from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
@ -20,7 +23,7 @@ def test_stream() -> None:
    """Test streaming tokens from ChatMistralAI."""
    llm = ChatMistralAI()

-    for token in llm.stream("I'm Pickle Rick"):
+    for token in llm.stream("Hello"):
        assert isinstance(token.content, str)


@ -30,16 +33,19 @@ async def test_astream() -> None:

    full: Optional[BaseMessageChunk] = None
    chunks_with_token_counts = 0
-    async for token in llm.astream("I'm Pickle Rick"):
+    chunks_with_response_metadata = 0
+    async for token in llm.astream("Hello"):
        assert isinstance(token, AIMessageChunk)
        assert isinstance(token.content, str)
        full = token if full is None else full + token
        if token.usage_metadata is not None:
            chunks_with_token_counts += 1
-    if chunks_with_token_counts != 1:
+        if token.response_metadata:
+            chunks_with_response_metadata += 1
+    if chunks_with_token_counts != 1 or chunks_with_response_metadata != 1:
        raise AssertionError(
-            "Expected exactly one chunk with token counts. "
-            "AIMessageChunk aggregation adds counts. Check that "
+            "Expected exactly one chunk with token counts or response_metadata. "
+            "AIMessageChunk aggregation adds / appends counts and metadata. Check that "
            "this is behaving properly."
        )
    assert isinstance(full, AIMessageChunk)
@ -50,6 +56,8 @@ async def test_astream() -> None:
        full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"]
        == full.usage_metadata["total_tokens"]
    )
+    assert isinstance(full.response_metadata["model_name"], str)
+    assert full.response_metadata["model_name"]


 async def test_abatch() -> None:
@ -296,3 +304,39 @@ def test_streaming_tool_call() -> None:
        acc = chunk if acc is None else acc + chunk
    assert acc.content != ""
    assert "tool_calls" not in acc.additional_kwargs
+
+
+def test_retry_parameters(caplog: pytest.LogCaptureFixture) -> None:
+    """Test that retry parameters are honored in ChatMistralAI."""
+    # Create a model with intentionally short timeout and multiple retries
+    mistral = ChatMistralAI(
+        timeout=1,  # Very short timeout to trigger timeouts
+        max_retries=3,  # Should retry 3 times
+    )
+
+    # Simple test input that should take longer than 1 second to process
+    test_input = "Write a 2 sentence story about a cat"
+
+    # Measure start time
+    t0 = time.time()
+
+    try:
+        # Try to get a response
+        response = mistral.invoke(test_input)
+
+        # If successful, validate the response
+        elapsed_time = time.time() - t0
+        logging.info(f"Request succeeded in {elapsed_time:.2f} seconds")
+        # Check that we got a valid response
+        assert response.content
+        assert isinstance(response.content, str)
+        assert "cat" in response.content.lower()
+
+    except ReadTimeout:
+        elapsed_time = time.time() - t0
+        logging.info(f"Request timed out after {elapsed_time:.2f} seconds")
+        assert elapsed_time >= 3.0
+        pytest.skip("Test timed out as expected with short timeout")
+    except Exception as e:
+        logging.error(f"Unexpected exception: {e}")
+        raise
--- a/libs/partners/mistralai/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/mistralai/tests/unit_tests/test_chat_models.py
@ -2,8 +2,9 @@

 import os
 from typing import Any, AsyncGenerator, Dict, Generator, List, cast
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch

+import httpx
 import pytest
 from langchain_core.callbacks.base import BaseCallbackHandler
 from langchain_core.messages import (
@ -270,3 +271,46 @@ def test_extra_kwargs() -> None:
    # Test that if provided twice it errors
    with pytest.raises(ValueError):
        ChatMistralAI(model="my-model", foo=3, model_kwargs={"foo": 2})  # type: ignore[call-arg]
+
+
+def test_retry_with_failure_then_success() -> None:
+    """Test that retry mechanism works correctly when
+    first request fails and second succeeds."""
+    # Create a real ChatMistralAI instance
+    chat = ChatMistralAI(max_retries=3)
+
+    # Set up the actual retry mechanism (not just mocking it)
+    # We'll track how many times the function is called
+    call_count = 0
+
+    def mock_post(*args: Any, **kwargs: Any) -> MagicMock:
+        nonlocal call_count
+        call_count += 1
+
+        if call_count == 1:
+            raise httpx.RequestError("Connection error", request=MagicMock())
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello!",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 1,
+                "completion_tokens": 1,
+                "total_tokens": 2,
+            },
+        }
+        return mock_response
+
+    with patch.object(chat.client, "post", side_effect=mock_post):
+        result = chat.invoke("Hello")
+        assert result.content == "Hello!"
+        assert call_count == 2, f"Expected 2 calls, but got {call_count}"
--- a/libs/partners/mistralai/uv.lock
+++ b/libs/partners/mistralai/uv.lock
@ -332,7 +332,7 @@ wheels = [

 [[package]]
 name = "langchain-core"
-version = "0.3.47"
+version = "0.3.49"
 source = { editable = "../../core" }
 dependencies = [
    { name = "jsonpatch" },
@ -390,7 +390,7 @@ typing = [

 [[package]]
 name = "langchain-mistralai"
-version = "0.2.9"
+version = "0.2.10"
 source = { editable = "." }
 dependencies = [
    { name = "httpx" },
@ -450,7 +450,7 @@ typing = [

 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.17"
 source = { editable = "../../standard-tests" }
 dependencies = [
    { name = "httpx" },
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@ -456,6 +456,12 @@ class BaseChatOpenAI(BaseChatModel):
    )
    """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
        None."""
+    stream_usage: bool = False
+    """Whether to include usage metadata in streaming output. If True, an additional
+    message chunk will be generated during the stream including usage metadata.
+
+    .. versionadded:: 0.3.9
+    """
    max_retries: Optional[int] = None
    """Maximum number of retries to make when generating."""
    presence_penalty: Optional[float] = None
@ -811,14 +817,38 @@ class BaseChatOpenAI(BaseChatModel):
                    is_first_chunk = False
                    yield generation_chunk

+    def _should_stream_usage(
+        self, stream_usage: Optional[bool] = None, **kwargs: Any
+    ) -> bool:
+        """Determine whether to include usage metadata in streaming output.
+
+        For backwards compatibility, we check for `stream_options` passed
+        explicitly to kwargs or in the model_kwargs and override self.stream_usage.
+        """
+        stream_usage_sources = [  # order of precedence
+            stream_usage,
+            kwargs.get("stream_options", {}).get("include_usage"),
+            self.model_kwargs.get("stream_options", {}).get("include_usage"),
+            self.stream_usage,
+        ]
+        for source in stream_usage_sources:
+            if isinstance(source, bool):
+                return source
+        return self.stream_usage
+
    def _stream(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        *,
+        stream_usage: Optional[bool] = None,
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        kwargs["stream"] = True
+        stream_usage = self._should_stream_usage(stream_usage, **kwargs)
+        if stream_usage:
+            kwargs["stream_options"] = {"include_usage": stream_usage}
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
        base_generation_info = {}
@ -1005,9 +1035,14 @@ class BaseChatOpenAI(BaseChatModel):
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        *,
+        stream_usage: Optional[bool] = None,
        **kwargs: Any,
    ) -> AsyncIterator[ChatGenerationChunk]:
        kwargs["stream"] = True
+        stream_usage = self._should_stream_usage(stream_usage, **kwargs)
+        if stream_usage:
+            kwargs["stream_options"] = {"include_usage": stream_usage}
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
        base_generation_info = {}
@ -2202,11 +2237,6 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]

    """  # noqa: E501

-    stream_usage: bool = False
-    """Whether to include usage metadata in streaming output. If True, additional
-    message chunks will be generated during the stream including usage metadata.
-    """
-
    max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")
    """Maximum number of tokens to generate."""

@ -2268,55 +2298,21 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                    message["role"] = "developer"
        return payload

-    def _should_stream_usage(
-        self, stream_usage: Optional[bool] = None, **kwargs: Any
-    ) -> bool:
-        """Determine whether to include usage metadata in streaming output.
-
-        For backwards compatibility, we check for `stream_options` passed
-        explicitly to kwargs or in the model_kwargs and override self.stream_usage.
-        """
-        stream_usage_sources = [  # order of preference
-            stream_usage,
-            kwargs.get("stream_options", {}).get("include_usage"),
-            self.model_kwargs.get("stream_options", {}).get("include_usage"),
-            self.stream_usage,
-        ]
-        for source in stream_usage_sources:
-            if isinstance(source, bool):
-                return source
-        return self.stream_usage
-
-    def _stream(
-        self, *args: Any, stream_usage: Optional[bool] = None, **kwargs: Any
-    ) -> Iterator[ChatGenerationChunk]:
-        """Set default stream_options."""
+    def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGenerationChunk]:
+        """Route to Chat Completions or Responses API."""
        if self._use_responses_api({**kwargs, **self.model_kwargs}):
            return super()._stream_responses(*args, **kwargs)
        else:
-            stream_usage = self._should_stream_usage(stream_usage, **kwargs)
-            # Note: stream_options is not a valid parameter for Azure OpenAI.
-            # To support users proxying Azure through ChatOpenAI, here we only specify
-            # stream_options if include_usage is set to True.
-            # See https://learn.microsoft.com/en-us/azure/ai-services/openai/whats-new
-            # for release notes.
-            if stream_usage:
-                kwargs["stream_options"] = {"include_usage": stream_usage}
-
            return super()._stream(*args, **kwargs)

    async def _astream(
-        self, *args: Any, stream_usage: Optional[bool] = None, **kwargs: Any
+        self, *args: Any, **kwargs: Any
    ) -> AsyncIterator[ChatGenerationChunk]:
-        """Set default stream_options."""
+        """Route to Chat Completions or Responses API."""
        if self._use_responses_api({**kwargs, **self.model_kwargs}):
            async for chunk in super()._astream_responses(*args, **kwargs):
                yield chunk
        else:
-            stream_usage = self._should_stream_usage(stream_usage, **kwargs)
-            if stream_usage:
-                kwargs["stream_options"] = {"include_usage": stream_usage}
-
            async for chunk in super()._astream(*args, **kwargs):
                yield chunk

--- a/libs/partners/openai/pyproject.toml
+++ b/libs/partners/openai/pyproject.toml
@ -7,12 +7,12 @@ authors = []
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.48",
+    "langchain-core<1.0.0,>=0.3.49",
    "openai<2.0.0,>=1.68.2",
    "tiktoken<1,>=0.7",
 ]
 name = "langchain-openai"
-version = "0.3.10"
+version = "0.3.11"
 description = "An integration package connecting OpenAI and LangChain"
 readme = "README.md"

@ -30,6 +30,7 @@ test = [
    "pytest-watcher<1.0.0,>=0.3.4",
    "pytest-asyncio<1.0.0,>=0.21.1",
    "pytest-cov<5.0.0,>=4.1.0",
+    "pytest-retry<1.8.0,>=1.7.0",
    "pytest-socket<1.0.0,>=0.6.0",
    "pytest-xdist<4.0.0,>=3.6.1",
    "numpy<2,>=1; python_version < \"3.12\"",
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
@ -3,7 +3,6 @@
 import os
 from typing import Type

-import pytest
 from langchain_core.language_models import BaseChatModel
 from langchain_tests.integration_tests import ChatModelIntegrationTests

@ -25,6 +24,7 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
            "model": "gpt-4o-mini",
            "openai_api_version": OPENAI_API_VERSION,
            "azure_endpoint": OPENAI_API_BASE,
+            "stream_usage": True,
        }

    @property
@ -35,10 +35,6 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
    def supports_json_mode(self) -> bool:
        return True

-    @pytest.mark.xfail(reason="Not yet supported.")
-    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
-        super().test_usage_metadata_streaming(model)
-

 class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests):
    """Test a legacy model."""
@ -53,12 +49,9 @@ class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests):
            "deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],
            "openai_api_version": OPENAI_API_VERSION,
            "azure_endpoint": OPENAI_API_BASE,
+            "stream_usage": True,
        }

    @property
    def structured_output_kwargs(self) -> dict:
        return {"method": "function_calling"}
-
-    @pytest.mark.xfail(reason="Not yet supported.")
-    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
-        super().test_usage_metadata_streaming(model)
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@ -53,6 +53,7 @@ def _check_response(response: Optional[BaseMessage]) -> None:
        assert tool_output["type"]


+@pytest.mark.flaky(retries=3, delay=1)
 def test_web_search() -> None:
    llm = ChatOpenAI(model=MODEL_NAME)
    first_response = llm.invoke(
@ -108,6 +109,7 @@ def test_web_search() -> None:
    _check_response(response)


+@pytest.mark.flaky(retries=3, delay=1)
 async def test_web_search_async() -> None:
    llm = ChatOpenAI(model=MODEL_NAME)
    response = await llm.ainvoke(
@ -129,6 +131,7 @@ async def test_web_search_async() -> None:
    _check_response(full)


+@pytest.mark.flaky(retries=3, delay=1)
 def test_function_calling() -> None:
    def multiply(x: int, y: int) -> int:
        """return x * y"""
@ -197,6 +200,7 @@ async def test_parsed_pydantic_schema_async() -> None:
    assert parsed.response


+@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
@ -241,6 +245,7 @@ def test_parsed_strict() -> None:
        )


+@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
@ -313,6 +318,7 @@ def test_route_from_model_kwargs() -> None:
    _ = next(llm.stream("Hello"))


+@pytest.mark.flaky(retries=3, delay=1)
 def test_computer_calls() -> None:
    llm = ChatOpenAI(model="computer-use-preview", model_kwargs={"truncation": "auto"})
    tool = {
--- a/Show More
+++ b/Show More