Merge branch 'master' into fix/azure_deepseek_structured_output

2025-08-28 14:05:02 +00:00 · 2025-07-22 04:28:44 +03:00 · 2025-07-22 04:28:44 +03:00 · b5b92a0967
commit b5b92a0967
parent 33c803c2c3 2104cf0d9a
11 changed files with 3051 additions and 2957 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@ -25,7 +25,7 @@ def get_user(user_id: str, verbose: bool = False):  # Maintains stable interface
 * Prefer descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
 * Break up overly long or deeply nested functions for **readability and maintainability**.
 * Avoid unnecessary abstraction or premature optimization.
-* All generated Python code must include type hints.
+* All generated Python code must include type hints and return types.
 Bad:
--- a/.github/workflows/_release.yml
+++ b/.github/workflows/_release.yml
@ -340,7 +340,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        partner: [openai]
+        partner: [openai, anthropic]
      fail-fast: false  # Continue testing other partners if one fails
    env:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@ -1029,7 +1029,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "Chroma",
@ -1042,7 +1042,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "Clickhouse",
@ -1055,7 +1055,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "CouchbaseSearchVectorStore",
@ -1081,7 +1081,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: false,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "ElasticsearchStore",
@ -1094,7 +1094,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "FAISS",
@ -1107,7 +1107,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "InMemoryVectorStore",
@ -1120,7 +1120,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "Milvus",
@ -1146,7 +1146,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "openGauss",
@ -1172,7 +1172,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "PineconeVectorStore",
@ -1185,7 +1185,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "QdrantVectorStore",
@ -1211,7 +1211,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "Weaviate",
@ -1224,7 +1224,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: true,
                local: true,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
            {
                name: "SQLServer",
@ -1237,7 +1237,7 @@ const FEATURE_TABLES = {
                passesStandardTests: false,
                multiTenancy: false,
                local: false,
-                idsInAddDocuments: false,
+                idsInAddDocuments: true,
            },
        ],
    }
--- a/libs/core/langchain_core/utils/init.py
+++ b/libs/core/langchain_core/utils/init.py
@ -23,7 +23,12 @@ if TYPE_CHECKING:
    from langchain_core.utils.iter import batch_iterate
    from langchain_core.utils.loading import try_load_from_hub
    from langchain_core.utils.pydantic import pre_init
-    from langchain_core.utils.strings import comma_list, stringify_dict, stringify_value
+    from langchain_core.utils.strings import (
        comma_list,
        sanitize_for_postgres,
        stringify_dict,
        stringify_value,
    )
    from langchain_core.utils.utils import (
        build_extra_kwargs,
        check_package_version,
@ -59,6 +64,7 @@ __all__ = (
    "pre_init",
    "print_text",
    "raise_for_status_with_text",
    "sanitize_for_postgres",
    "secret_from_env",
    "stringify_dict",
    "stringify_value",
@ -81,6 +87,7 @@ _dynamic_imports = {
    "try_load_from_hub": "loading",
    "pre_init": "pydantic",
    "comma_list": "strings",
    "sanitize_for_postgres": "strings",
    "stringify_dict": "strings",
    "stringify_value": "strings",
    "build_extra_kwargs": "utils",
--- a/libs/core/langchain_core/utils/strings.py
+++ b/libs/core/langchain_core/utils/strings.py
@ -46,3 +46,26 @@ def comma_list(items: list[Any]) -> str:
        str: The comma-separated string.
    """
    return ", ".join(str(item) for item in items)
 def sanitize_for_postgres(text: str, replacement: str = "") -> str:
    r"""Sanitize text by removing NUL bytes that are incompatible with PostgreSQL.
    PostgreSQL text fields cannot contain NUL (0x00) bytes, which can cause
    psycopg.DataError when inserting documents. This function removes or replaces
    such characters to ensure compatibility.
    Args:
        text: The text to sanitize.
        replacement: String to replace NUL bytes with. Defaults to empty string.
    Returns:
        str: The sanitized text with NUL bytes removed or replaced.
    Example:
        >>> sanitize_for_postgres("Hello\\x00world")
        'Helloworld'
        >>> sanitize_for_postgres("Hello\\x00world", " ")
        'Hello world'
    """
    return text.replace("\x00", replacement)
--- a/libs/core/tests/unit_tests/utils/test_imports.py
+++ b/libs/core/tests/unit_tests/utils/test_imports.py
@ -27,6 +27,7 @@ EXPECTED_ALL = [
    "pre_init",
    "from_env",
    "secret_from_env",
    "sanitize_for_postgres",
 ]
--- a/libs/core/tests/unit_tests/utils/test_strings.py
+++ b/libs/core/tests/unit_tests/utils/test_strings.py
@ -0,0 +1,49 @@
 """Test string utilities."""
 from langchain_core.utils.strings import (
    comma_list,
    sanitize_for_postgres,
    stringify_dict,
    stringify_value,
 )
 def test_sanitize_for_postgres() -> None:
    """Test sanitizing text for PostgreSQL compatibility."""
    # Test with NUL bytes
    text_with_nul = "Hello\x00world\x00test"
    expected = "Helloworldtest"
    assert sanitize_for_postgres(text_with_nul) == expected
    # Test with replacement character
    expected_with_replacement = "Hello world test"
    assert sanitize_for_postgres(text_with_nul, " ") == expected_with_replacement
    # Test with text without NUL bytes
    clean_text = "Hello world"
    assert sanitize_for_postgres(clean_text) == clean_text
    # Test empty string
    assert sanitize_for_postgres("") == ""
    # Test with multiple consecutive NUL bytes
    text_with_multiple_nuls = "Hello\x00\x00\x00world"
    assert sanitize_for_postgres(text_with_multiple_nuls) == "Helloworld"
    assert sanitize_for_postgres(text_with_multiple_nuls, "-") == "Hello---world"
 def test_existing_string_functions() -> None:
    """Test existing string functions still work."""
    # Test comma_list
    assert comma_list([1, 2, 3]) == "1, 2, 3"
    assert comma_list(["a", "b", "c"]) == "a, b, c"
    # Test stringify_value
    assert stringify_value("hello") == "hello"
    assert stringify_value(42) == "42"
    # Test stringify_dict
    data = {"key": "value", "number": 123}
    result = stringify_dict(data)
    assert "key: value" in result
    assert "number: 123" in result
--- a/libs/langchain/langchain/chains/openai_functions/citation_fuzzy_match.py
+++ b/libs/langchain/langchain/chains/openai_functions/citation_fuzzy_match.py
@ -123,7 +123,10 @@ def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
        Chain (LLMChain) that can be used to answer questions with citations.
    """
    output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer)
-    schema = QuestionAnswer.schema()
+    if hasattr(QuestionAnswer, "model_json_schema"):
        schema = QuestionAnswer.model_json_schema()
    else:
        schema = QuestionAnswer.schema()
    function = {
        "name": schema["title"],
        "description": schema["description"],
--- a/libs/langchain/langchain/evaluation/parsing/json_schema.py
+++ b/libs/langchain/langchain/evaluation/parsing/json_schema.py
@ -70,8 +70,11 @@ class JsonSchemaEvaluator(StringEvaluator):
    def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]:
        if isinstance(node, str):
            return parse_json_markdown(node)
        if hasattr(node, "model_json_schema") and callable(node.model_json_schema):
            # Pydantic v2 model
            return node.model_json_schema()
        if hasattr(node, "schema") and callable(node.schema):
-            # Pydantic model
+            # Pydantic v1 model
            return node.schema()
        return node
--- a/libs/langchain/langchain/output_parsers/yaml.py
+++ b/libs/langchain/langchain/output_parsers/yaml.py
@ -43,7 +43,15 @@ class YamlOutputParser(BaseOutputParser[T]):
    def get_format_instructions(self) -> str:
        # Copy schema to avoid altering original Pydantic schema.
-        schema = dict(self.pydantic_object.schema().items())
+        if hasattr(self.pydantic_object, "model_json_schema"):
            # Pydantic v2
            schema = dict(self.pydantic_object.model_json_schema().items())
        elif hasattr(self.pydantic_object, "schema"):
            # Pydantic v1
            schema = dict(self.pydantic_object.schema().items())
        else:
            msg = "Pydantic object must have either model_json_schema or schema method"
            raise ValueError(msg)
        # Remove extraneous fields.
        reduced_schema = schema
--- a/libs/langchain/uv.lock
+++ b/libs/langchain/uv.lock