mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 10:17:00 +00:00
style(core): more cleanup all around (#33711)
This commit is contained in:
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
|
||||
"""Base abstract message class.
|
||||
|
||||
Messages are the inputs and outputs of a chat model.
|
||||
|
||||
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
|
||||
[`AIMessage`][langchain.messages.AIMessage], and
|
||||
[`SystemMessage`][langchain.messages.SystemMessage].
|
||||
"""
|
||||
|
||||
content: str | list[str | dict]
|
||||
|
||||
@@ -1,4 +1,20 @@
|
||||
"""**OutputParser** classes parse the output of an LLM call."""
|
||||
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
||||
|
||||
!!! tip "Structured output"
|
||||
|
||||
Output parsers emerged as an early solution to the challenge of obtaining structured
|
||||
output from LLMs.
|
||||
|
||||
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
natively. In such cases, using output parsers may be unnecessary, and you should
|
||||
leverage the model's built-in capabilities for structured output. Refer to the
|
||||
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
||||
for guidance on how to achieve structured output directly.
|
||||
|
||||
Output parsers remain valuable when working with models that do not support
|
||||
structured output natively, or when you require additional processing or validation
|
||||
of the model's output beyond its inherent capabilities.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -135,6 +135,9 @@ class BaseOutputParser(
|
||||
|
||||
Example:
|
||||
```python
|
||||
# Implement a simple boolean output parser
|
||||
|
||||
|
||||
class BooleanOutputParser(BaseOutputParser[bool]):
|
||||
true_val: str = "YES"
|
||||
false_val: str = "NO"
|
||||
|
||||
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
||||
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
||||
"""Parse the output of an LLM call to a JSON object.
|
||||
|
||||
Probably the most reliable output parser for getting structured data that does *not*
|
||||
use function calling.
|
||||
|
||||
When used in streaming mode, it will yield partial JSON objects containing
|
||||
all the keys that have been returned so far.
|
||||
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
||||
describing the difference between the previous and the current object.
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
||||
difference between the previous and the current object.
|
||||
"""
|
||||
|
||||
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
||||
|
||||
@@ -41,7 +41,7 @@ def droplastn(
|
||||
|
||||
|
||||
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
"""Parse the output of an LLM call to a list."""
|
||||
"""Parse the output of a model to a list."""
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# extract text
|
||||
# Extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# add current chunk to buffer
|
||||
# Add current chunk to buffer
|
||||
buffer += chunk
|
||||
# parse buffer into a list of parts
|
||||
# Parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# yield the last part
|
||||
# Yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
async for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# extract text
|
||||
# Extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# add current chunk to buffer
|
||||
# Add current chunk to buffer
|
||||
buffer += chunk
|
||||
# parse buffer into a list of parts
|
||||
# Parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# yield the last part
|
||||
# Yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
|
||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
"""Parse the output of an LLM call to a comma-separated list."""
|
||||
"""Parse the output of a model to a comma-separated list."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
)
|
||||
return [item for sublist in reader for item in sublist]
|
||||
except csv.Error:
|
||||
# keep old logic for backup
|
||||
# Keep old logic for backup
|
||||
return [part.strip() for part in text.split(",")]
|
||||
|
||||
@property
|
||||
|
||||
@@ -224,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Raises:
|
||||
@@ -307,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
||||
|
||||
|
||||
class StrOutputParser(BaseTransformOutputParser[str]):
|
||||
"""OutputParser that parses LLMResult into the top likely string."""
|
||||
"""OutputParser that parses `LLMResult` into the top likely string."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""StrOutputParser is serializable.
|
||||
"""`StrOutputParser` is serializable.
|
||||
|
||||
Returns:
|
||||
True
|
||||
`True`
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
@@ -43,19 +43,19 @@ class _StreamingParser:
|
||||
"""Streaming parser for XML.
|
||||
|
||||
This implementation is pulled into a class to avoid implementation
|
||||
drift between transform and atransform of the XMLOutputParser.
|
||||
drift between transform and atransform of the `XMLOutputParser`.
|
||||
"""
|
||||
|
||||
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
||||
"""Initialize the streaming parser.
|
||||
|
||||
Args:
|
||||
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
See documentation in XMLOutputParser for more information.
|
||||
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
|
||||
`'xml'`. See documentation in `XMLOutputParser` for more information.
|
||||
|
||||
Raises:
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
"""
|
||||
if parser == "defusedxml":
|
||||
if not _HAS_DEFUSEDXML:
|
||||
@@ -79,10 +79,10 @@ class _StreamingParser:
|
||||
"""Parse a chunk of text.
|
||||
|
||||
Args:
|
||||
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
|
||||
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
|
||||
|
||||
Yields:
|
||||
A dictionary representing the parsed XML element.
|
||||
A `dict` representing the parsed XML element.
|
||||
|
||||
Raises:
|
||||
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
||||
@@ -147,46 +147,49 @@ class _StreamingParser:
|
||||
|
||||
|
||||
class XMLOutputParser(BaseTransformOutputParser):
|
||||
"""Parse an output using xml format."""
|
||||
"""Parse an output using xml format.
|
||||
|
||||
Returns a dictionary of tags.
|
||||
"""
|
||||
|
||||
tags: list[str] | None = None
|
||||
"""Tags to tell the LLM to expect in the XML output.
|
||||
|
||||
Note this may not be perfect depending on the LLM implementation.
|
||||
|
||||
For example, with tags=["foo", "bar", "baz"]:
|
||||
For example, with `tags=["foo", "bar", "baz"]`:
|
||||
|
||||
1. A well-formatted XML instance:
|
||||
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
|
||||
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
|
||||
|
||||
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
||||
"<foo>\n <bar>\n </foo>"
|
||||
`"<foo>\n <bar>\n </foo>"`
|
||||
|
||||
3. A badly-formatted XML instance (unexpected 'tag' element):
|
||||
"<foo>\n <tag>\n </tag>\n</foo>"
|
||||
`"<foo>\n <tag>\n </tag>\n</foo>"`
|
||||
"""
|
||||
encoding_matcher: re.Pattern = re.compile(
|
||||
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
||||
)
|
||||
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
||||
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
|
||||
|
||||
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* 'xml' is the standard library parser.
|
||||
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* `'xml'` is the standard library parser.
|
||||
|
||||
Use `xml` only if you are sure that your distribution of the standard library
|
||||
is not vulnerable to XML vulnerabilities.
|
||||
Use `xml` only if you are sure that your distribution of the standard library is not
|
||||
vulnerable to XML vulnerabilities.
|
||||
|
||||
Please review the following resources for more information:
|
||||
|
||||
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
||||
* https://github.com/tiran/defusedxml
|
||||
|
||||
The standard library relies on libexpat for parsing XML:
|
||||
https://github.com/libexpat/libexpat
|
||||
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
|
||||
for parsing XML.
|
||||
"""
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
|
||||
text: The output of an LLM call.
|
||||
|
||||
Returns:
|
||||
A dictionary representing the parsed XML.
|
||||
A `dict` representing the parsed XML.
|
||||
|
||||
Raises:
|
||||
OutputParserException: If the XML is not well-formed.
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
"""
|
||||
# Try to find XML string within triple backticks
|
||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||
|
||||
@@ -776,42 +776,41 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
|
||||
Use to create flexible templated prompts for chat models.
|
||||
|
||||
Examples:
|
||||
!!! warning "Behavior changed in 0.2.24"
|
||||
You can pass any Message-like formats supported by
|
||||
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
||||
init.
|
||||
!!! warning "Behavior changed in 0.2.24"
|
||||
You can pass any Message-like formats supported by
|
||||
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
||||
init.
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
|
||||
Messages Placeholder:
|
||||
!!! note "Messages Placeholder"
|
||||
|
||||
```python
|
||||
# In addition to Human/AI/Tool/Function messages,
|
||||
@@ -852,13 +851,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
# )
|
||||
```
|
||||
|
||||
Single-variable template:
|
||||
!!! note "Single-variable template"
|
||||
|
||||
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
||||
and you invoke the template with a non-dict object, the prompt template will
|
||||
inject the provided argument into that variable location.
|
||||
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
|
||||
@@ -96,10 +96,10 @@ class RunLogPatch:
|
||||
"""Patch to the run log."""
|
||||
|
||||
ops: list[dict[str, Any]]
|
||||
"""List of jsonpatch operations, which describe how to create the run state
|
||||
"""List of JSONPatch operations, which describe how to create the run state
|
||||
from an empty dict. This is the minimal representation of the log, designed to
|
||||
be serialized as JSON and sent over the wire to reconstruct the log on the other
|
||||
side. Reconstruction of the state can be done with any jsonpatch-compliant library,
|
||||
side. Reconstruction of the state can be done with any JSONPatch-compliant library,
|
||||
see https://jsonpatch.com for more information."""
|
||||
|
||||
def __init__(self, *ops: dict[str, Any]) -> None:
|
||||
|
||||
@@ -11,8 +11,7 @@ When developing an application, developers should inspect the capabilities and
|
||||
permissions of the tools that underlie the given agent toolkit, and determine
|
||||
whether permissions of the given toolkit are appropriate for the application.
|
||||
|
||||
See [Security](https://docs.langchain.com/oss/python/security-policy) for more
|
||||
information.
|
||||
See https://docs.langchain.com/oss/python/security-policy for more information.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
@@ -499,12 +499,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
and HuggingFace tokenizer based on the tiktoken_enabled flag.
|
||||
|
||||
Args:
|
||||
texts (List[str]): A list of texts to embed.
|
||||
engine (str): The engine or model to use for embeddings.
|
||||
chunk_size (int | None): The size of chunks for processing embeddings.
|
||||
texts: A list of texts to embed.
|
||||
engine: The engine or model to use for embeddings.
|
||||
chunk_size: The size of chunks for processing embeddings.
|
||||
|
||||
Returns:
|
||||
List[List[float]]: A list of embeddings for each input text.
|
||||
A list of embeddings for each input text.
|
||||
"""
|
||||
_chunk_size = chunk_size or self.chunk_size
|
||||
client_kwargs = {**self._invocation_params, **kwargs}
|
||||
@@ -551,12 +551,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
|
||||
|
||||
Args:
|
||||
texts (List[str]): A list of texts to embed.
|
||||
engine (str): The engine or model to use for embeddings.
|
||||
chunk_size (int | None): The size of chunks for processing embeddings.
|
||||
texts: A list of texts to embed.
|
||||
engine: The engine or model to use for embeddings.
|
||||
chunk_size: The size of chunks for processing embeddings.
|
||||
|
||||
Returns:
|
||||
List[List[float]]: A list of embeddings for each input text.
|
||||
A list of embeddings for each input text.
|
||||
"""
|
||||
_chunk_size = chunk_size or self.chunk_size
|
||||
client_kwargs = {**self._invocation_params, **kwargs}
|
||||
|
||||
Reference in New Issue
Block a user