mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-28 05:54:55 +00:00
Merge branch 'master' into fix/azure_deepseek_structured_output
This commit is contained in:
commit
b5b92a0967
2
.github/copilot-instructions.md
vendored
2
.github/copilot-instructions.md
vendored
@ -25,7 +25,7 @@ def get_user(user_id: str, verbose: bool = False): # Maintains stable interface
|
||||
* Prefer descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
* Break up overly long or deeply nested functions for **readability and maintainability**.
|
||||
* Avoid unnecessary abstraction or premature optimization.
|
||||
* All generated Python code must include type hints.
|
||||
* All generated Python code must include type hints and return types.
|
||||
|
||||
Bad:
|
||||
|
||||
|
2
.github/workflows/_release.yml
vendored
2
.github/workflows/_release.yml
vendored
@ -340,7 +340,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [openai]
|
||||
partner: [openai, anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
@ -1029,7 +1029,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Chroma",
|
||||
@ -1042,7 +1042,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Clickhouse",
|
||||
@ -1055,7 +1055,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "CouchbaseSearchVectorStore",
|
||||
@ -1081,7 +1081,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: false,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "ElasticsearchStore",
|
||||
@ -1094,7 +1094,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "FAISS",
|
||||
@ -1107,7 +1107,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "InMemoryVectorStore",
|
||||
@ -1120,7 +1120,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Milvus",
|
||||
@ -1146,7 +1146,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "openGauss",
|
||||
@ -1172,7 +1172,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "PineconeVectorStore",
|
||||
@ -1185,7 +1185,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "QdrantVectorStore",
|
||||
@ -1211,7 +1211,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "Weaviate",
|
||||
@ -1224,7 +1224,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: true,
|
||||
local: true,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
{
|
||||
name: "SQLServer",
|
||||
@ -1237,7 +1237,7 @@ const FEATURE_TABLES = {
|
||||
passesStandardTests: false,
|
||||
multiTenancy: false,
|
||||
local: false,
|
||||
idsInAddDocuments: false,
|
||||
idsInAddDocuments: true,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
@ -23,7 +23,12 @@ if TYPE_CHECKING:
|
||||
from langchain_core.utils.iter import batch_iterate
|
||||
from langchain_core.utils.loading import try_load_from_hub
|
||||
from langchain_core.utils.pydantic import pre_init
|
||||
from langchain_core.utils.strings import comma_list, stringify_dict, stringify_value
|
||||
from langchain_core.utils.strings import (
|
||||
comma_list,
|
||||
sanitize_for_postgres,
|
||||
stringify_dict,
|
||||
stringify_value,
|
||||
)
|
||||
from langchain_core.utils.utils import (
|
||||
build_extra_kwargs,
|
||||
check_package_version,
|
||||
@ -59,6 +64,7 @@ __all__ = (
|
||||
"pre_init",
|
||||
"print_text",
|
||||
"raise_for_status_with_text",
|
||||
"sanitize_for_postgres",
|
||||
"secret_from_env",
|
||||
"stringify_dict",
|
||||
"stringify_value",
|
||||
@ -81,6 +87,7 @@ _dynamic_imports = {
|
||||
"try_load_from_hub": "loading",
|
||||
"pre_init": "pydantic",
|
||||
"comma_list": "strings",
|
||||
"sanitize_for_postgres": "strings",
|
||||
"stringify_dict": "strings",
|
||||
"stringify_value": "strings",
|
||||
"build_extra_kwargs": "utils",
|
||||
|
@ -46,3 +46,26 @@ def comma_list(items: list[Any]) -> str:
|
||||
str: The comma-separated string.
|
||||
"""
|
||||
return ", ".join(str(item) for item in items)
|
||||
|
||||
|
||||
def sanitize_for_postgres(text: str, replacement: str = "") -> str:
|
||||
r"""Sanitize text by removing NUL bytes that are incompatible with PostgreSQL.
|
||||
|
||||
PostgreSQL text fields cannot contain NUL (0x00) bytes, which can cause
|
||||
psycopg.DataError when inserting documents. This function removes or replaces
|
||||
such characters to ensure compatibility.
|
||||
|
||||
Args:
|
||||
text: The text to sanitize.
|
||||
replacement: String to replace NUL bytes with. Defaults to empty string.
|
||||
|
||||
Returns:
|
||||
str: The sanitized text with NUL bytes removed or replaced.
|
||||
|
||||
Example:
|
||||
>>> sanitize_for_postgres("Hello\\x00world")
|
||||
'Helloworld'
|
||||
>>> sanitize_for_postgres("Hello\\x00world", " ")
|
||||
'Hello world'
|
||||
"""
|
||||
return text.replace("\x00", replacement)
|
||||
|
@ -27,6 +27,7 @@ EXPECTED_ALL = [
|
||||
"pre_init",
|
||||
"from_env",
|
||||
"secret_from_env",
|
||||
"sanitize_for_postgres",
|
||||
]
|
||||
|
||||
|
||||
|
49
libs/core/tests/unit_tests/utils/test_strings.py
Normal file
49
libs/core/tests/unit_tests/utils/test_strings.py
Normal file
@ -0,0 +1,49 @@
|
||||
"""Test string utilities."""
|
||||
|
||||
from langchain_core.utils.strings import (
|
||||
comma_list,
|
||||
sanitize_for_postgres,
|
||||
stringify_dict,
|
||||
stringify_value,
|
||||
)
|
||||
|
||||
|
||||
def test_sanitize_for_postgres() -> None:
|
||||
"""Test sanitizing text for PostgreSQL compatibility."""
|
||||
# Test with NUL bytes
|
||||
text_with_nul = "Hello\x00world\x00test"
|
||||
expected = "Helloworldtest"
|
||||
assert sanitize_for_postgres(text_with_nul) == expected
|
||||
|
||||
# Test with replacement character
|
||||
expected_with_replacement = "Hello world test"
|
||||
assert sanitize_for_postgres(text_with_nul, " ") == expected_with_replacement
|
||||
|
||||
# Test with text without NUL bytes
|
||||
clean_text = "Hello world"
|
||||
assert sanitize_for_postgres(clean_text) == clean_text
|
||||
|
||||
# Test empty string
|
||||
assert sanitize_for_postgres("") == ""
|
||||
|
||||
# Test with multiple consecutive NUL bytes
|
||||
text_with_multiple_nuls = "Hello\x00\x00\x00world"
|
||||
assert sanitize_for_postgres(text_with_multiple_nuls) == "Helloworld"
|
||||
assert sanitize_for_postgres(text_with_multiple_nuls, "-") == "Hello---world"
|
||||
|
||||
|
||||
def test_existing_string_functions() -> None:
|
||||
"""Test existing string functions still work."""
|
||||
# Test comma_list
|
||||
assert comma_list([1, 2, 3]) == "1, 2, 3"
|
||||
assert comma_list(["a", "b", "c"]) == "a, b, c"
|
||||
|
||||
# Test stringify_value
|
||||
assert stringify_value("hello") == "hello"
|
||||
assert stringify_value(42) == "42"
|
||||
|
||||
# Test stringify_dict
|
||||
data = {"key": "value", "number": 123}
|
||||
result = stringify_dict(data)
|
||||
assert "key: value" in result
|
||||
assert "number: 123" in result
|
@ -123,7 +123,10 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
|
||||
Chain (LLMChain) that can be used to answer questions with citations.
|
||||
"""
|
||||
output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer)
|
||||
schema = QuestionAnswer.schema()
|
||||
if hasattr(QuestionAnswer, "model_json_schema"):
|
||||
schema = QuestionAnswer.model_json_schema()
|
||||
else:
|
||||
schema = QuestionAnswer.schema()
|
||||
function = {
|
||||
"name": schema["title"],
|
||||
"description": schema["description"],
|
||||
|
@ -70,8 +70,11 @@ class JsonSchemaEvaluator(StringEvaluator):
|
||||
def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]:
|
||||
if isinstance(node, str):
|
||||
return parse_json_markdown(node)
|
||||
if hasattr(node, "model_json_schema") and callable(node.model_json_schema):
|
||||
# Pydantic v2 model
|
||||
return node.model_json_schema()
|
||||
if hasattr(node, "schema") and callable(node.schema):
|
||||
# Pydantic model
|
||||
# Pydantic v1 model
|
||||
return node.schema()
|
||||
return node
|
||||
|
||||
|
@ -43,7 +43,15 @@ class YamlOutputParser(BaseOutputParser[T]):
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
# Copy schema to avoid altering original Pydantic schema.
|
||||
schema = dict(self.pydantic_object.schema().items())
|
||||
if hasattr(self.pydantic_object, "model_json_schema"):
|
||||
# Pydantic v2
|
||||
schema = dict(self.pydantic_object.model_json_schema().items())
|
||||
elif hasattr(self.pydantic_object, "schema"):
|
||||
# Pydantic v1
|
||||
schema = dict(self.pydantic_object.schema().items())
|
||||
else:
|
||||
msg = "Pydantic object must have either model_json_schema or schema method"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Remove extraneous fields.
|
||||
reduced_schema = schema
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user