diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py
index 0ff93095f20..96c99b5087a 100644
--- a/libs/core/langchain_core/messages/base.py
+++ b/libs/core/langchain_core/messages/base.py
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
"""Base abstract message class.
Messages are the inputs and outputs of a chat model.
+
+ Examples include [`HumanMessage`][langchain.messages.HumanMessage],
+ [`AIMessage`][langchain.messages.AIMessage], and
+ [`SystemMessage`][langchain.messages.SystemMessage].
"""
content: str | list[str | dict]
diff --git a/libs/core/langchain_core/output_parsers/__init__.py b/libs/core/langchain_core/output_parsers/__init__.py
index 81c40b73a43..7bd9c0ca893 100644
--- a/libs/core/langchain_core/output_parsers/__init__.py
+++ b/libs/core/langchain_core/output_parsers/__init__.py
@@ -1,4 +1,20 @@
-"""**OutputParser** classes parse the output of an LLM call."""
+"""`OutputParser` classes parse the output of an LLM call into structured data.
+
+!!! tip "Structured output"
+
+ Output parsers emerged as an early solution to the challenge of obtaining structured
+ output from LLMs.
+
+ Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
+ natively. In such cases, using output parsers may be unnecessary, and you should
+ leverage the model's built-in capabilities for structured output. Refer to the
+ [documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
+ for guidance on how to achieve structured output directly.
+
+ Output parsers remain valuable when working with models that do not support
+ structured output natively, or when you require additional processing or validation
+ of the model's output beyond its inherent capabilities.
+"""
from typing import TYPE_CHECKING
diff --git a/libs/core/langchain_core/output_parsers/base.py b/libs/core/langchain_core/output_parsers/base.py
index 1ef27eb11aa..53f5240a96c 100644
--- a/libs/core/langchain_core/output_parsers/base.py
+++ b/libs/core/langchain_core/output_parsers/base.py
@@ -135,6 +135,9 @@ class BaseOutputParser(
Example:
```python
+ # Implement a simple boolean output parser
+
+
class BooleanOutputParser(BaseOutputParser[bool]):
true_val: str = "YES"
false_val: str = "NO"
diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py
index 9daacf1ad53..fc2b43ee01c 100644
--- a/libs/core/langchain_core/output_parsers/json.py
+++ b/libs/core/langchain_core/output_parsers/json.py
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
"""Parse the output of an LLM call to a JSON object.
+ Probably the most reliable output parser for getting structured data that does *not*
+ use function calling.
+
When used in streaming mode, it will yield partial JSON objects containing
all the keys that have been returned so far.
- In streaming, if `diff` is set to `True`, yields JSONPatch operations
- describing the difference between the previous and the current object.
+ In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
+ difference between the previous and the current object.
"""
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
diff --git a/libs/core/langchain_core/output_parsers/list.py b/libs/core/langchain_core/output_parsers/list.py
index 22c77058a3e..16b99b64f6a 100644
--- a/libs/core/langchain_core/output_parsers/list.py
+++ b/libs/core/langchain_core/output_parsers/list.py
@@ -41,7 +41,7 @@ def droplastn(
class ListOutputParser(BaseTransformOutputParser[list[str]]):
- """Parse the output of an LLM call to a list."""
+ """Parse the output of a model to a list."""
@property
def _type(self) -> str:
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
for chunk in input:
if isinstance(chunk, BaseMessage):
- # extract text
+ # Extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
- # add current chunk to buffer
+ # Add current chunk to buffer
buffer += chunk
- # parse buffer into a list of parts
+ # Parse buffer into a list of parts
try:
done_idx = 0
- # yield only complete parts
+ # Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
- # yield only complete parts
+ # Yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
- # yield the last part
+ # Yield the last part
for part in self.parse(buffer):
yield [part]
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
async for chunk in input:
if isinstance(chunk, BaseMessage):
- # extract text
+ # Extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
- # add current chunk to buffer
+ # Add current chunk to buffer
buffer += chunk
- # parse buffer into a list of parts
+ # Parse buffer into a list of parts
try:
done_idx = 0
- # yield only complete parts
+ # Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
- # yield only complete parts
+ # Yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
- # yield the last part
+ # Yield the last part
for part in self.parse(buffer):
yield [part]
class CommaSeparatedListOutputParser(ListOutputParser):
- """Parse the output of an LLM call to a comma-separated list."""
+ """Parse the output of a model to a comma-separated list."""
@classmethod
def is_lc_serializable(cls) -> bool:
- """Return True as this class is serializable."""
+ """Return `True` as this class is serializable."""
return True
@classmethod
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
)
return [item for sublist in reader for item in sublist]
except csv.Error:
- # keep old logic for backup
+ # Keep old logic for backup
return [part.strip() for part in text.split(",")]
@property
diff --git a/libs/core/langchain_core/output_parsers/openai_tools.py b/libs/core/langchain_core/output_parsers/openai_tools.py
index d1d254170b4..23884abdfd3 100644
--- a/libs/core/langchain_core/output_parsers/openai_tools.py
+++ b/libs/core/langchain_core/output_parsers/openai_tools.py
@@ -224,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
- all the keys that have been returned so far.
+ all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Raises:
@@ -307,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
- all the keys that have been returned so far.
+ all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Returns:
diff --git a/libs/core/langchain_core/output_parsers/string.py b/libs/core/langchain_core/output_parsers/string.py
index 456fcdc83b8..4b189e1c467 100644
--- a/libs/core/langchain_core/output_parsers/string.py
+++ b/libs/core/langchain_core/output_parsers/string.py
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
class StrOutputParser(BaseTransformOutputParser[str]):
- """OutputParser that parses LLMResult into the top likely string."""
+ """OutputParser that parses `LLMResult` into the top likely string."""
@classmethod
def is_lc_serializable(cls) -> bool:
- """StrOutputParser is serializable.
+ """`StrOutputParser` is serializable.
Returns:
- True
+ `True`
"""
return True
diff --git a/libs/core/langchain_core/output_parsers/xml.py b/libs/core/langchain_core/output_parsers/xml.py
index 718145ebb58..55e93542d7f 100644
--- a/libs/core/langchain_core/output_parsers/xml.py
+++ b/libs/core/langchain_core/output_parsers/xml.py
@@ -43,19 +43,19 @@ class _StreamingParser:
"""Streaming parser for XML.
This implementation is pulled into a class to avoid implementation
- drift between transform and atransform of the XMLOutputParser.
+ drift between transform and atransform of the `XMLOutputParser`.
"""
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
"""Initialize the streaming parser.
Args:
- parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
- See documentation in XMLOutputParser for more information.
+ parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
+ `'xml'`. See documentation in `XMLOutputParser` for more information.
Raises:
- ImportError: If defusedxml is not installed and the defusedxml
- parser is requested.
+ ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
+ requested.
"""
if parser == "defusedxml":
if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ class _StreamingParser:
"""Parse a chunk of text.
Args:
- chunk: A chunk of text to parse. This can be a string or a BaseMessage.
+ chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
Yields:
- A dictionary representing the parsed XML element.
+ A `dict` representing the parsed XML element.
Raises:
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ class _StreamingParser:
class XMLOutputParser(BaseTransformOutputParser):
- """Parse an output using xml format."""
+ """Parse an output using xml format.
+
+ Returns a dictionary of tags.
+ """
tags: list[str] | None = None
"""Tags to tell the LLM to expect in the XML output.
Note this may not be perfect depending on the LLM implementation.
- For example, with tags=["foo", "bar", "baz"]:
+ For example, with `tags=["foo", "bar", "baz"]`:
1. A well-formatted XML instance:
- "\n \n \n \n"
+ `"\n \n \n \n"`
2. A badly-formatted XML instance (missing closing tag for 'bar'):
- "\n \n "
+ `"\n \n "`
3. A badly-formatted XML instance (unexpected 'tag' element):
- "\n \n \n"
+ `"\n \n \n"`
"""
encoding_matcher: re.Pattern = re.compile(
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
)
parser: Literal["defusedxml", "xml"] = "defusedxml"
- """Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
+ """Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
- * 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
- present in some distributions of Python's standard library xml.
- `defusedxml` is a wrapper around the standard library parser that
- sets up the parser with secure defaults.
- * 'xml' is the standard library parser.
+ * `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
+ present in some distributions of Python's standard library xml.
+ `defusedxml` is a wrapper around the standard library parser that
+ sets up the parser with secure defaults.
+ * `'xml'` is the standard library parser.
- Use `xml` only if you are sure that your distribution of the standard library
- is not vulnerable to XML vulnerabilities.
+ Use `xml` only if you are sure that your distribution of the standard library is not
+ vulnerable to XML vulnerabilities.
Please review the following resources for more information:
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
* https://github.com/tiran/defusedxml
- The standard library relies on libexpat for parsing XML:
- https://github.com/libexpat/libexpat
+ The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
+ for parsing XML.
"""
def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
text: The output of an LLM call.
Returns:
- A dictionary representing the parsed XML.
+ A `dict` representing the parsed XML.
Raises:
OutputParserException: If the XML is not well-formed.
- ImportError: If defusedxml is not installed and the defusedxml
- parser is requested.
+ ImportError: If defus`edxml is not installed and the `defusedxml` parser is
+ requested.
"""
# Try to find XML string within triple backticks
# Imports are temporarily placed here to avoid issue with caching on CI
diff --git a/libs/core/langchain_core/prompts/chat.py b/libs/core/langchain_core/prompts/chat.py
index 99b1b7451c1..24cd084817e 100644
--- a/libs/core/langchain_core/prompts/chat.py
+++ b/libs/core/langchain_core/prompts/chat.py
@@ -776,42 +776,41 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
Use to create flexible templated prompts for chat models.
- Examples:
- !!! warning "Behavior changed in 0.2.24"
- You can pass any Message-like formats supported by
- `ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
- init.
+ !!! warning "Behavior changed in 0.2.24"
+ You can pass any Message-like formats supported by
+ `ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
+ init.
- ```python
- from langchain_core.prompts import ChatPromptTemplate
+ ```python
+ from langchain_core.prompts import ChatPromptTemplate
- template = ChatPromptTemplate(
- [
- ("system", "You are a helpful AI bot. Your name is {name}."),
- ("human", "Hello, how are you doing?"),
- ("ai", "I'm doing well, thanks!"),
- ("human", "{user_input}"),
- ]
- )
+ template = ChatPromptTemplate(
+ [
+ ("system", "You are a helpful AI bot. Your name is {name}."),
+ ("human", "Hello, how are you doing?"),
+ ("ai", "I'm doing well, thanks!"),
+ ("human", "{user_input}"),
+ ]
+ )
- prompt_value = template.invoke(
- {
- "name": "Bob",
- "user_input": "What is your name?",
- }
- )
- # Output:
- # ChatPromptValue(
- # messages=[
- # SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
- # HumanMessage(content='Hello, how are you doing?'),
- # AIMessage(content="I'm doing well, thanks!"),
- # HumanMessage(content='What is your name?')
- # ]
- # )
- ```
+ prompt_value = template.invoke(
+ {
+ "name": "Bob",
+ "user_input": "What is your name?",
+ }
+ )
+ # Output:
+ # ChatPromptValue(
+ # messages=[
+ # SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
+ # HumanMessage(content='Hello, how are you doing?'),
+ # AIMessage(content="I'm doing well, thanks!"),
+ # HumanMessage(content='What is your name?')
+ # ]
+ # )
+ ```
- Messages Placeholder:
+ !!! note "Messages Placeholder"
```python
# In addition to Human/AI/Tool/Function messages,
@@ -852,13 +851,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# )
```
- Single-variable template:
+ !!! note "Single-variable template"
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
and you invoke the template with a non-dict object, the prompt template will
inject the provided argument into that variable location.
-
```python
from langchain_core.prompts import ChatPromptTemplate
diff --git a/libs/core/langchain_core/tracers/log_stream.py b/libs/core/langchain_core/tracers/log_stream.py
index b42e35f548d..345d5176735 100644
--- a/libs/core/langchain_core/tracers/log_stream.py
+++ b/libs/core/langchain_core/tracers/log_stream.py
@@ -96,10 +96,10 @@ class RunLogPatch:
"""Patch to the run log."""
ops: list[dict[str, Any]]
- """List of jsonpatch operations, which describe how to create the run state
+ """List of JSONPatch operations, which describe how to create the run state
from an empty dict. This is the minimal representation of the log, designed to
be serialized as JSON and sent over the wire to reconstruct the log on the other
- side. Reconstruction of the state can be done with any jsonpatch-compliant library,
+ side. Reconstruction of the state can be done with any JSONPatch-compliant library,
see https://jsonpatch.com for more information."""
def __init__(self, *ops: dict[str, Any]) -> None:
diff --git a/libs/langchain/langchain_classic/agents/agent_toolkits/__init__.py b/libs/langchain/langchain_classic/agents/agent_toolkits/__init__.py
index d16543bd560..e09e993933b 100644
--- a/libs/langchain/langchain_classic/agents/agent_toolkits/__init__.py
+++ b/libs/langchain/langchain_classic/agents/agent_toolkits/__init__.py
@@ -11,8 +11,7 @@ When developing an application, developers should inspect the capabilities and
permissions of the tools that underlie the given agent toolkit, and determine
whether permissions of the given toolkit are appropriate for the application.
-See [Security](https://docs.langchain.com/oss/python/security-policy) for more
-information.
+See https://docs.langchain.com/oss/python/security-policy for more information.
"""
from pathlib import Path
diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py
index f53640b02ed..bd3dbf3416f 100644
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@@ -499,12 +499,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
and HuggingFace tokenizer based on the tiktoken_enabled flag.
Args:
- texts (List[str]): A list of texts to embed.
- engine (str): The engine or model to use for embeddings.
- chunk_size (int | None): The size of chunks for processing embeddings.
+ texts: A list of texts to embed.
+ engine: The engine or model to use for embeddings.
+ chunk_size: The size of chunks for processing embeddings.
Returns:
- List[List[float]]: A list of embeddings for each input text.
+ A list of embeddings for each input text.
"""
_chunk_size = chunk_size or self.chunk_size
client_kwargs = {**self._invocation_params, **kwargs}
@@ -551,12 +551,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
Args:
- texts (List[str]): A list of texts to embed.
- engine (str): The engine or model to use for embeddings.
- chunk_size (int | None): The size of chunks for processing embeddings.
+ texts: A list of texts to embed.
+ engine: The engine or model to use for embeddings.
+ chunk_size: The size of chunks for processing embeddings.
Returns:
- List[List[float]]: A list of embeddings for each input text.
+ A list of embeddings for each input text.
"""
_chunk_size = chunk_size or self.chunk_size
client_kwargs = {**self._invocation_params, **kwargs}