Merge branch 'master' into fix/azure_deepseek_structured_output

This commit is contained in:
Ahmed Tammaa 2025-07-22 04:28:44 +03:00 committed by GitHub
commit b5b92a0967
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 3051 additions and 2957 deletions

View File

@ -25,7 +25,7 @@ def get_user(user_id: str, verbose: bool = False): # Maintains stable interface
* Prefer descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers. * Prefer descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
* Break up overly long or deeply nested functions for **readability and maintainability**. * Break up overly long or deeply nested functions for **readability and maintainability**.
* Avoid unnecessary abstraction or premature optimization. * Avoid unnecessary abstraction or premature optimization.
* All generated Python code must include type hints. * All generated Python code must include type hints and return types.
Bad: Bad:

View File

@ -340,7 +340,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
partner: [openai] partner: [openai, anthropic]
fail-fast: false # Continue testing other partners if one fails fail-fast: false # Continue testing other partners if one fails
env: env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

View File

@ -1029,7 +1029,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "Chroma", name: "Chroma",
@ -1042,7 +1042,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "Clickhouse", name: "Clickhouse",
@ -1055,7 +1055,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "CouchbaseSearchVectorStore", name: "CouchbaseSearchVectorStore",
@ -1081,7 +1081,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: false, local: false,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "ElasticsearchStore", name: "ElasticsearchStore",
@ -1094,7 +1094,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "FAISS", name: "FAISS",
@ -1107,7 +1107,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "InMemoryVectorStore", name: "InMemoryVectorStore",
@ -1120,7 +1120,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "Milvus", name: "Milvus",
@ -1146,7 +1146,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "openGauss", name: "openGauss",
@ -1172,7 +1172,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "PineconeVectorStore", name: "PineconeVectorStore",
@ -1185,7 +1185,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "QdrantVectorStore", name: "QdrantVectorStore",
@ -1211,7 +1211,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "Weaviate", name: "Weaviate",
@ -1224,7 +1224,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: true, multiTenancy: true,
local: true, local: true,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
{ {
name: "SQLServer", name: "SQLServer",
@ -1237,7 +1237,7 @@ const FEATURE_TABLES = {
passesStandardTests: false, passesStandardTests: false,
multiTenancy: false, multiTenancy: false,
local: false, local: false,
idsInAddDocuments: false, idsInAddDocuments: true,
}, },
], ],
} }

View File

@ -23,7 +23,12 @@ if TYPE_CHECKING:
from langchain_core.utils.iter import batch_iterate from langchain_core.utils.iter import batch_iterate
from langchain_core.utils.loading import try_load_from_hub from langchain_core.utils.loading import try_load_from_hub
from langchain_core.utils.pydantic import pre_init from langchain_core.utils.pydantic import pre_init
from langchain_core.utils.strings import comma_list, stringify_dict, stringify_value from langchain_core.utils.strings import (
comma_list,
sanitize_for_postgres,
stringify_dict,
stringify_value,
)
from langchain_core.utils.utils import ( from langchain_core.utils.utils import (
build_extra_kwargs, build_extra_kwargs,
check_package_version, check_package_version,
@ -59,6 +64,7 @@ __all__ = (
"pre_init", "pre_init",
"print_text", "print_text",
"raise_for_status_with_text", "raise_for_status_with_text",
"sanitize_for_postgres",
"secret_from_env", "secret_from_env",
"stringify_dict", "stringify_dict",
"stringify_value", "stringify_value",
@ -81,6 +87,7 @@ _dynamic_imports = {
"try_load_from_hub": "loading", "try_load_from_hub": "loading",
"pre_init": "pydantic", "pre_init": "pydantic",
"comma_list": "strings", "comma_list": "strings",
"sanitize_for_postgres": "strings",
"stringify_dict": "strings", "stringify_dict": "strings",
"stringify_value": "strings", "stringify_value": "strings",
"build_extra_kwargs": "utils", "build_extra_kwargs": "utils",

View File

@ -46,3 +46,26 @@ def comma_list(items: list[Any]) -> str:
str: The comma-separated string. str: The comma-separated string.
""" """
return ", ".join(str(item) for item in items) return ", ".join(str(item) for item in items)
def sanitize_for_postgres(text: str, replacement: str = "") -> str:
r"""Sanitize text by removing NUL bytes that are incompatible with PostgreSQL.
PostgreSQL text fields cannot contain NUL (0x00) bytes, which can cause
psycopg.DataError when inserting documents. This function removes or replaces
such characters to ensure compatibility.
Args:
text: The text to sanitize.
replacement: String to replace NUL bytes with. Defaults to empty string.
Returns:
str: The sanitized text with NUL bytes removed or replaced.
Example:
>>> sanitize_for_postgres("Hello\\x00world")
'Helloworld'
>>> sanitize_for_postgres("Hello\\x00world", " ")
'Hello world'
"""
return text.replace("\x00", replacement)

View File

@ -27,6 +27,7 @@ EXPECTED_ALL = [
"pre_init", "pre_init",
"from_env", "from_env",
"secret_from_env", "secret_from_env",
"sanitize_for_postgres",
] ]

View File

@ -0,0 +1,49 @@
"""Test string utilities."""
from langchain_core.utils.strings import (
comma_list,
sanitize_for_postgres,
stringify_dict,
stringify_value,
)
def test_sanitize_for_postgres() -> None:
"""Test sanitizing text for PostgreSQL compatibility."""
# Test with NUL bytes
text_with_nul = "Hello\x00world\x00test"
expected = "Helloworldtest"
assert sanitize_for_postgres(text_with_nul) == expected
# Test with replacement character
expected_with_replacement = "Hello world test"
assert sanitize_for_postgres(text_with_nul, " ") == expected_with_replacement
# Test with text without NUL bytes
clean_text = "Hello world"
assert sanitize_for_postgres(clean_text) == clean_text
# Test empty string
assert sanitize_for_postgres("") == ""
# Test with multiple consecutive NUL bytes
text_with_multiple_nuls = "Hello\x00\x00\x00world"
assert sanitize_for_postgres(text_with_multiple_nuls) == "Helloworld"
assert sanitize_for_postgres(text_with_multiple_nuls, "-") == "Hello---world"
def test_existing_string_functions() -> None:
"""Test existing string functions still work."""
# Test comma_list
assert comma_list([1, 2, 3]) == "1, 2, 3"
assert comma_list(["a", "b", "c"]) == "a, b, c"
# Test stringify_value
assert stringify_value("hello") == "hello"
assert stringify_value(42) == "42"
# Test stringify_dict
data = {"key": "value", "number": 123}
result = stringify_dict(data)
assert "key: value" in result
assert "number: 123" in result

View File

@ -123,7 +123,10 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
Chain (LLMChain) that can be used to answer questions with citations. Chain (LLMChain) that can be used to answer questions with citations.
""" """
output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer) output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer)
schema = QuestionAnswer.schema() if hasattr(QuestionAnswer, "model_json_schema"):
schema = QuestionAnswer.model_json_schema()
else:
schema = QuestionAnswer.schema()
function = { function = {
"name": schema["title"], "name": schema["title"],
"description": schema["description"], "description": schema["description"],

View File

@ -70,8 +70,11 @@ class JsonSchemaEvaluator(StringEvaluator):
def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]: def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]:
if isinstance(node, str): if isinstance(node, str):
return parse_json_markdown(node) return parse_json_markdown(node)
if hasattr(node, "model_json_schema") and callable(node.model_json_schema):
# Pydantic v2 model
return node.model_json_schema()
if hasattr(node, "schema") and callable(node.schema): if hasattr(node, "schema") and callable(node.schema):
# Pydantic model # Pydantic v1 model
return node.schema() return node.schema()
return node return node

View File

@ -43,7 +43,15 @@ class YamlOutputParser(BaseOutputParser[T]):
def get_format_instructions(self) -> str: def get_format_instructions(self) -> str:
# Copy schema to avoid altering original Pydantic schema. # Copy schema to avoid altering original Pydantic schema.
schema = dict(self.pydantic_object.schema().items()) if hasattr(self.pydantic_object, "model_json_schema"):
# Pydantic v2
schema = dict(self.pydantic_object.model_json_schema().items())
elif hasattr(self.pydantic_object, "schema"):
# Pydantic v1
schema = dict(self.pydantic_object.schema().items())
else:
msg = "Pydantic object must have either model_json_schema or schema method"
raise ValueError(msg)
# Remove extraneous fields. # Remove extraneous fields.
reduced_schema = schema reduced_schema = schema

File diff suppressed because it is too large Load Diff