From f203229b513aa0f04704388081a501a7d8773eeb Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 24 Oct 2024 09:17:10 +0800 Subject: [PATCH 1/7] community: Fix the failure of ChatSparkLLM after upgrading to Pydantic V2 (#27418) **Description:** The test_sparkllm.py can reproduce this issue. https://github.com/langchain-ai/langchain/blob/master/libs/community/tests/integration_tests/chat_models/test_sparkllm.py#L66 ``` Testing started at 18:27 ... Launching pytest with arguments test_sparkllm.py::test_chat_spark_llm --no-header --no-summary -q in /Users/zhanglei/Work/github/langchain/libs/community/tests/integration_tests/chat_models ============================= test session starts ============================== collecting ... collected 1 item test_sparkllm.py::test_chat_spark_llm ============================== 1 failed in 0.45s =============================== FAILED [100%] tests/integration_tests/chat_models/test_sparkllm.py:65 (test_chat_spark_llm) def test_chat_spark_llm() -> None: > chat = ChatSparkLLM( spark_app_id="your spark_app_id", spark_api_key="your spark_api_key", spark_api_secret="your spark_api_secret", ) # type: ignore[call-arg] test_sparkllm.py:67: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ../../../../core/langchain_core/load/serializable.py:111: in __init__ super().__init__(*args, **kwargs) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ cls = values = {'spark_api_key': 'your spark_api_key', 'spark_api_secret': 'your spark_api_secret', 'spark_api_url': 'wss://spark-api.xf-yun.com/v3.5/chat', 'spark_app_id': 'your spark_app_id', ...} @model_validator(mode="before") @classmethod def validate_environment(cls, values: Dict) -> Any: values["spark_app_id"] = get_from_dict_or_env( values, ["spark_app_id", "app_id"], "IFLYTEK_SPARK_APP_ID", ) values["spark_api_key"] = get_from_dict_or_env( values, ["spark_api_key", "api_key"], "IFLYTEK_SPARK_API_KEY", ) values["spark_api_secret"] = get_from_dict_or_env( values, ["spark_api_secret", "api_secret"], "IFLYTEK_SPARK_API_SECRET", ) values["spark_api_url"] = get_from_dict_or_env( values, "spark_api_url", "IFLYTEK_SPARK_API_URL", SPARK_API_URL, ) values["spark_llm_domain"] = get_from_dict_or_env( values, "spark_llm_domain", "IFLYTEK_SPARK_LLM_DOMAIN", SPARK_LLM_DOMAIN, ) # put extra params into model_kwargs default_values = { name: field.default for name, field in get_fields(cls).items() if field.default is not None } > values["model_kwargs"]["temperature"] = default_values.get("temperature") E KeyError: 'model_kwargs' ../../../langchain_community/chat_models/sparkllm.py:368: KeyError ``` I found that when upgrading to Pydantic v2, @root_validator was changed to @model_validator. When a class declares multiple @model_validator(model=before), the execution order in V1 and V2 is opposite. This is the reason for ChatSparkLLM's failure. The correct execution order is to execute build_extra first. https://github.com/langchain-ai/langchain/blob/langchain%3D%3D0.2.16/libs/community/langchain_community/chat_models/sparkllm.py#L302 And then execute validate_environment. https://github.com/langchain-ai/langchain/blob/langchain%3D%3D0.2.16/libs/community/langchain_community/chat_models/sparkllm.py#L329 The Pydantic community also discusses it, but there hasn't been a conclusion yet. https://github.com/pydantic/pydantic/discussions/7434 **Issus:** #27416 **Twitter handle:** coolbeevip --------- Co-authored-by: vbarda --- libs/community/extended_testing_deps.txt | 1 + .../chat_models/sparkllm.py | 60 ++++++++++--------- .../unit_tests/chat_models/test_sparkllm.py | 24 ++++++++ 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt index b2548b22193..56caca04381 100644 --- a/libs/community/extended_testing_deps.txt +++ b/libs/community/extended_testing_deps.txt @@ -95,3 +95,4 @@ xmltodict>=0.13.0,<0.14 nanopq==0.2.1 mlflow[genai]>=2.14.0 databricks-sdk>=0.30.0 +websocket>=0.2.1,<1 \ No newline at end of file diff --git a/libs/community/langchain_community/chat_models/sparkllm.py b/libs/community/langchain_community/chat_models/sparkllm.py index 996c8c2a2f6..3b7d1d47d29 100644 --- a/libs/community/langchain_community/chat_models/sparkllm.py +++ b/libs/community/langchain_community/chat_models/sparkllm.py @@ -300,34 +300,6 @@ class ChatSparkLLM(BaseChatModel): populate_by_name=True, ) - @model_validator(mode="before") - @classmethod - def build_extra(cls, values: Dict[str, Any]) -> Any: - """Build extra kwargs from additional params that were passed in.""" - all_required_field_names = get_pydantic_field_names(cls) - extra = values.get("model_kwargs", {}) - for field_name in list(values): - if field_name in extra: - raise ValueError(f"Found {field_name} supplied twice.") - if field_name not in all_required_field_names: - logger.warning( - f"""WARNING! {field_name} is not default parameter. - {field_name} was transferred to model_kwargs. - Please confirm that {field_name} is what you intended.""" - ) - extra[field_name] = values.pop(field_name) - - invalid_model_kwargs = all_required_field_names.intersection(extra.keys()) - if invalid_model_kwargs: - raise ValueError( - f"Parameters {invalid_model_kwargs} should be specified explicitly. " - f"Instead they were passed in as part of `model_kwargs` parameter." - ) - - values["model_kwargs"] = extra - - return values - @model_validator(mode="before") @classmethod def validate_environment(cls, values: Dict) -> Any: @@ -378,6 +350,38 @@ class ChatSparkLLM(BaseChatModel): ) return values + # When using Pydantic V2 + # The execution order of multiple @model_validator decorators is opposite to + # their declaration order. https://github.com/pydantic/pydantic/discussions/7434 + + @model_validator(mode="before") + @classmethod + def build_extra(cls, values: Dict[str, Any]) -> Any: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + if field_name not in all_required_field_names: + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transferred to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + + invalid_model_kwargs = all_required_field_names.intersection(extra.keys()) + if invalid_model_kwargs: + raise ValueError( + f"Parameters {invalid_model_kwargs} should be specified explicitly. " + f"Instead they were passed in as part of `model_kwargs` parameter." + ) + + values["model_kwargs"] = extra + + return values + def _stream( self, messages: List[BaseMessage], diff --git a/libs/community/tests/unit_tests/chat_models/test_sparkllm.py b/libs/community/tests/unit_tests/chat_models/test_sparkllm.py index 6d7e4cf6aa8..f4c768a75d2 100644 --- a/libs/community/tests/unit_tests/chat_models/test_sparkllm.py +++ b/libs/community/tests/unit_tests/chat_models/test_sparkllm.py @@ -1,3 +1,4 @@ +import pytest from langchain_core.messages import ( AIMessage, HumanMessage, @@ -8,6 +9,7 @@ from langchain_core.output_parsers.openai_tools import ( ) from langchain_community.chat_models.sparkllm import ( + ChatSparkLLM, convert_dict_to_message, convert_message_to_dict, ) @@ -83,3 +85,25 @@ def test__convert_message_to_dict_system() -> None: result = convert_message_to_dict(message) expected_output = {"role": "system", "content": "foo"} assert result == expected_output + + +@pytest.mark.requires("websocket") +def test__chat_spark_llm_initialization() -> None: + chat = ChatSparkLLM( + app_id="IFLYTEK_SPARK_APP_ID", + api_key="IFLYTEK_SPARK_API_KEY", + api_secret="IFLYTEK_SPARK_API_SECRET", + api_url="IFLYTEK_SPARK_API_URL", + model="IFLYTEK_SPARK_LLM_DOMAIN", + timeout=40, + temperature=0.1, + top_k=3, + ) + assert chat.spark_app_id == "IFLYTEK_SPARK_APP_ID" + assert chat.spark_api_key == "IFLYTEK_SPARK_API_KEY" + assert chat.spark_api_secret == "IFLYTEK_SPARK_API_SECRET" + assert chat.spark_api_url == "IFLYTEK_SPARK_API_URL" + assert chat.spark_llm_domain == "IFLYTEK_SPARK_LLM_DOMAIN" + assert chat.request_timeout == 40 + assert chat.temperature == 0.1 + assert chat.top_k == 3 From d081a5400ab748a952b67a19f1d17758a215ebcf Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Wed, 23 Oct 2024 21:26:38 -0400 Subject: [PATCH 2/7] docs: fix more links (#27598) Fix more links --- docs/docs/how_to/agent_executor.ipynb | 2 +- .../how_to/example_selectors_langsmith.ipynb | 2 +- docs/docs/how_to/few_shot_examples.ipynb | 2 +- docs/docs/how_to/few_shot_examples_chat.ipynb | 2 +- docs/docs/how_to/index.mdx | 2 +- docs/docs/integrations/retrievers/index.mdx | 2 +- docs/docs/integrations/vectorstores/index.mdx | 2 +- docs/docs/tutorials/llm_chain.ipynb | 2 +- docs/docs/tutorials/local_rag.ipynb | 2 +- docs/docs/tutorials/pdf_qa.ipynb | 4 +- docs/docs/tutorials/qa_chat_history.ipynb | 2 +- docs/docs/tutorials/query_analysis.ipynb | 2 +- docs/docs/tutorials/rag.ipynb | 2 +- docs/scripts/append_related_links.py | 82 +++++++++++-------- 14 files changed, 61 insertions(+), 49 deletions(-) diff --git a/docs/docs/how_to/agent_executor.ipynb b/docs/docs/how_to/agent_executor.ipynb index 995b631f17b..647e4c6a117 100644 --- a/docs/docs/how_to/agent_executor.ipynb +++ b/docs/docs/how_to/agent_executor.ipynb @@ -35,7 +35,7 @@ "- Creating a [Retriever](/docs/concepts/retrievers) to expose specific information to our agent\n", "- Using a Search [Tool](/docs/concepts/tools) to look up things online\n", "- [`Chat History`](/docs/concepts/chat_history), which allows a chatbot to \"remember\" past interactions and take them into account when responding to follow-up questions. \n", - "- Debugging and tracing your application using [LangSmith](/docs/concepts/#langsmith)\n", + "- Debugging and tracing your application using [LangSmith](https://docs.smith.langchain.com/)\n", "\n", "## Setup\n", "\n", diff --git a/docs/docs/how_to/example_selectors_langsmith.ipynb b/docs/docs/how_to/example_selectors_langsmith.ipynb index 2c59ed313cd..efc9e2db46d 100644 --- a/docs/docs/how_to/example_selectors_langsmith.ipynb +++ b/docs/docs/how_to/example_selectors_langsmith.ipynb @@ -13,7 +13,7 @@ "\n", "\n", "\n", diff --git a/docs/docs/how_to/few_shot_examples.ipynb b/docs/docs/how_to/few_shot_examples.ipynb index d583be400ab..6c8d0926f03 100644 --- a/docs/docs/how_to/few_shot_examples.ipynb +++ b/docs/docs/how_to/few_shot_examples.ipynb @@ -23,7 +23,7 @@ "- [Prompt templates](/docs/concepts/prompt_templates)\n", "- [Example selectors](/docs/concepts/example_selectors)\n", "- [LLMs](/docs/concepts/text_llms)\n", - "- [Vectorstores](/docs/concepts/#vector-stores)\n", + "- [Vectorstores](/docs/concepts/vectorstores)\n", "\n", ":::\n", "\n", diff --git a/docs/docs/how_to/few_shot_examples_chat.ipynb b/docs/docs/how_to/few_shot_examples_chat.ipynb index 1192a211f11..51e41f65e40 100644 --- a/docs/docs/how_to/few_shot_examples_chat.ipynb +++ b/docs/docs/how_to/few_shot_examples_chat.ipynb @@ -23,7 +23,7 @@ "- [Prompt templates](/docs/concepts/prompt_templates)\n", "- [Example selectors](/docs/concepts/example_selectors)\n", "- [Chat models](/docs/concepts/chat_models)\n", - "- [Vectorstores](/docs/concepts/#vector-stores)\n", + "- [Vectorstores](/docs/concepts/vectorstores)\n", "\n", ":::\n", "\n", diff --git a/docs/docs/how_to/index.mdx b/docs/docs/how_to/index.mdx index 3ca54fe42a5..7e1b0535f09 100644 --- a/docs/docs/how_to/index.mdx +++ b/docs/docs/how_to/index.mdx @@ -159,7 +159,7 @@ What LangChain calls [LLMs](/docs/concepts/text_llms) are older forms of languag ### Vector stores -[Vector stores](/docs/concepts/#vector-stores) are databases that can efficiently store and retrieve embeddings. +[Vector stores](/docs/concepts/vectorstores) are databases that can efficiently store and retrieve embeddings. - [How to: use a vector store to retrieve data](/docs/how_to/vectorstores) diff --git a/docs/docs/integrations/retrievers/index.mdx b/docs/docs/integrations/retrievers/index.mdx index dee97cbb13e..4bbc458c563 100644 --- a/docs/docs/integrations/retrievers/index.mdx +++ b/docs/docs/integrations/retrievers/index.mdx @@ -16,7 +16,7 @@ Retrievers accept a string query as input and return a list of [Documents](https For specifics on how to use retrievers, see the [relevant how-to guides here](/docs/how_to/#retrievers). -Note that all [vector stores](/docs/concepts/#vector-stores) can be [cast to retrievers](/docs/how_to/vectorstore_retriever/). +Note that all [vector stores](/docs/concepts/vectorstores) can be [cast to retrievers](/docs/how_to/vectorstore_retriever/). Refer to the vector store [integration docs](/docs/integrations/vectorstores/) for available vector stores. This page lists custom retrievers, implemented via subclassing [BaseRetriever](/docs/how_to/custom_retriever/). diff --git a/docs/docs/integrations/vectorstores/index.mdx b/docs/docs/integrations/vectorstores/index.mdx index d6631ce9457..8b276057dad 100644 --- a/docs/docs/integrations/vectorstores/index.mdx +++ b/docs/docs/integrations/vectorstores/index.mdx @@ -7,7 +7,7 @@ sidebar_class_name: hidden import { CategoryTable, IndexTable } from "@theme/FeatureTables"; -A [vector store](/docs/concepts/#vector-stores) stores [embedded](/docs/concepts/embedding_models) data and performs similarity search. +A [vector store](/docs/concepts/vectorstores) stores [embedded](/docs/concepts/embedding_models) data and performs similarity search. diff --git a/docs/docs/tutorials/llm_chain.ipynb b/docs/docs/tutorials/llm_chain.ipynb index d9bd9500bce..0a04d91876f 100644 --- a/docs/docs/tutorials/llm_chain.ipynb +++ b/docs/docs/tutorials/llm_chain.ipynb @@ -27,7 +27,7 @@ "\n", "- Using [LangChain Expression Language (LCEL)](/docs/concepts/lcel) to chain components together\n", "\n", - "- Debugging and tracing your application using [LangSmith](/docs/concepts/#langsmith)\n", + "- Debugging and tracing your application using [LangSmith](https://docs.smith.langchain.com/)\n", "\n", "- Deploying your application with [LangServe](/docs/concepts/#langserve)\n", "\n", diff --git a/docs/docs/tutorials/local_rag.ipynb b/docs/docs/tutorials/local_rag.ipynb index d98b07732a5..a43f764d4c1 100644 --- a/docs/docs/tutorials/local_rag.ipynb +++ b/docs/docs/tutorials/local_rag.ipynb @@ -14,7 +14,7 @@ "- [Chat Models](/docs/concepts/chat_models)\n", "- [Chaining runnables](/docs/how_to/sequence/)\n", "- [Embeddings](/docs/concepts/embedding_models)\n", - "- [Vector stores](/docs/concepts/#vector-stores)\n", + "- [Vector stores](/docs/concepts/vectorstores)\n", "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", "\n", ":::\n", diff --git a/docs/docs/tutorials/pdf_qa.ipynb b/docs/docs/tutorials/pdf_qa.ipynb index d5af436708a..bdc5792eac8 100644 --- a/docs/docs/tutorials/pdf_qa.ipynb +++ b/docs/docs/tutorials/pdf_qa.ipynb @@ -26,7 +26,7 @@ "- [Document loaders](/docs/concepts/document_loaders)\n", "- [Chat models](/docs/concepts/chat_models)\n", "- [Embeddings](/docs/concepts/embedding_models)\n", - "- [Vector stores](/docs/concepts/#vector-stores)\n", + "- [Vector stores](/docs/concepts/vectorstores)\n", "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", "\n", ":::\n", @@ -117,7 +117,7 @@ "\n", "## Question answering with RAG\n", "\n", - "Next, you'll prepare the loaded documents for later retrieval. Using a [text splitter](/docs/concepts/text_splitters), you'll split your loaded documents into smaller documents that can more easily fit into an LLM's context window, then load them into a [vector store](/docs/concepts/#vector-stores). You can then create a [retriever](/docs/concepts/retrievers) from the vector store for use in our RAG chain:\n", + "Next, you'll prepare the loaded documents for later retrieval. Using a [text splitter](/docs/concepts/text_splitters), you'll split your loaded documents into smaller documents that can more easily fit into an LLM's context window, then load them into a [vector store](/docs/concepts/vectorstores). You can then create a [retriever](/docs/concepts/retrievers) from the vector store for use in our RAG chain:\n", "\n", "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", "\n", diff --git a/docs/docs/tutorials/qa_chat_history.ipynb b/docs/docs/tutorials/qa_chat_history.ipynb index a74959d05b3..8aff4c3a014 100644 --- a/docs/docs/tutorials/qa_chat_history.ipynb +++ b/docs/docs/tutorials/qa_chat_history.ipynb @@ -24,7 +24,7 @@ "- [Chat history](/docs/concepts/chat_history)\n", "- [Chat models](/docs/concepts/chat_models)\n", "- [Embeddings](/docs/concepts/embedding_models)\n", - "- [Vector stores](/docs/concepts/#vector-stores)\n", + "- [Vector stores](/docs/concepts/vectorstores)\n", "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", "- [Tools](/docs/concepts/tools)\n", "- [Agents](/docs/concepts/agents)\n", diff --git a/docs/docs/tutorials/query_analysis.ipynb b/docs/docs/tutorials/query_analysis.ipynb index 102febcf1aa..c5bf42f4ce8 100644 --- a/docs/docs/tutorials/query_analysis.ipynb +++ b/docs/docs/tutorials/query_analysis.ipynb @@ -24,7 +24,7 @@ "- [Document loaders](/docs/concepts/document_loaders)\n", "- [Chat models](/docs/concepts/chat_models)\n", "- [Embeddings](/docs/concepts/embedding_models)\n", - "- [Vector stores](/docs/concepts/#vector-stores)\n", + "- [Vector stores](/docs/concepts/vectorstores)\n", "- [Retrieval](/docs/concepts/retrieval)\n", "\n", ":::\n", diff --git a/docs/docs/tutorials/rag.ipynb b/docs/docs/tutorials/rag.ipynb index dd2299346df..8eb07fbb5cf 100644 --- a/docs/docs/tutorials/rag.ipynb +++ b/docs/docs/tutorials/rag.ipynb @@ -41,7 +41,7 @@ "### Indexing\n", "1. **Load**: First we need to load our data. This is done with [Document Loaders](/docs/concepts/document_loaders).\n", "2. **Split**: [Text splitters](/docs/concepts/text_splitters) break large `Documents` into smaller chunks. This is useful both for indexing data and for passing it in to a model, since large chunks are harder to search over and won't fit in a model's finite context window.\n", - "3. **Store**: We need somewhere to store and index our splits, so that they can later be searched over. This is often done using a [VectorStore](/docs/concepts/#vector-stores) and [Embeddings](/docs/concepts/embedding_models) model.\n", + "3. **Store**: We need somewhere to store and index our splits, so that they can later be searched over. This is often done using a [VectorStore](/docs/concepts/vectorstores) and [Embeddings](/docs/concepts/embedding_models) model.\n", "\n", "![index_diagram](../../static/img/rag_indexing.png)\n", "\n", diff --git a/docs/scripts/append_related_links.py b/docs/scripts/append_related_links.py index 49746bcc9e3..9689b432dca 100644 --- a/docs/scripts/append_related_links.py +++ b/docs/scripts/append_related_links.py @@ -3,47 +3,59 @@ import multiprocessing import re import sys from pathlib import Path +from typing import Optional +# List of 4-tuples (integration_name, display_name, concept_page, how_to_fragment) +INTEGRATION_INFO = [ + ("chat", "Chat model", "chat_models", "chat-models"), + ("llms", "LLM", "text_llms", "llms"), + ("text_embedding", "Embedding model", "embedding_models", "embedding-models"), + ("document_loaders", "Document loader", "document_loaders", "document-loaders"), + ("vectorstores", "Vector store", "vectorstores", "vector-stores"), + ("retrievers", "Retriever", "retrievers", "retrievers"), + ("tools", "Tool", "tools", "tools"), + # stores is a special case because there are no key-value store how-tos yet + # this is a placeholder for when we do have them + # for now the related links section will only contain the conceptual guide. + ("stores", "Key-value store", "key_value_stores", "key-value-stores"), +] -def _generate_related_links_section(integration_type: str, notebook_name: str): - concept_display_name = None - concept_heading = None - if integration_type == "chat": - concept_display_name = "Chat model" - concept_heading = "chat-models" - elif integration_type == "llms": - concept_display_name = "LLM" - concept_heading = "llms" - elif integration_type == "text_embedding": - concept_display_name = "Embedding model" - concept_heading = "embedding-models" - elif integration_type == "document_loaders": - concept_display_name = "Document loader" - concept_heading = "document-loaders" - elif integration_type == "vectorstores": - concept_display_name = "Vector store" - concept_heading = "vector-stores" - elif integration_type == "retrievers": - concept_display_name = "Retriever" - concept_heading = "retrievers" - elif integration_type == "tools": - concept_display_name = "Tool" - concept_heading = "tools" - elif integration_type == "stores": - concept_display_name = "Key-value store" - concept_heading = "key-value-stores" - # Special case because there are no key-value store how-tos yet - return f"""## Related +# Create a dictionary with key being the first element (integration_name) and value being the rest of the tuple +INTEGRATION_INFO_DICT = { + integration_name: rest for integration_name, *rest in INTEGRATION_INFO +} -- [{concept_display_name} conceptual guide](/docs/concepts/#{concept_heading}) +RELATED_LINKS_SECTION = """## Related +- {concept_display_name} [conceptual guide](/docs/concepts/{concept_page}) +- {concept_display_name} [how-to guides](/docs/how_to/#{how_to_fragment}) """ - else: + +RELATED_LINKS_WITHOUT_HOW_TO_SECTION = """## Related +- {concept_display_name} [conceptual guide](/docs/concepts/{concept_page}) +""" + + +def _generate_related_links_section( + integration_type: str, notebook_name: str +) -> Optional[str]: + if integration_type not in INTEGRATION_INFO_DICT: return None - return f"""## Related + concept_display_name, concept_page, how_to_fragment = INTEGRATION_INFO_DICT[ + integration_type + ] -- {concept_display_name} [conceptual guide](/docs/concepts/#{concept_heading}) -- {concept_display_name} [how-to guides](/docs/how_to/#{concept_heading}) -""" + # Special case because there are no key-value store how-tos yet + if integration_type == "stores": + return RELATED_LINKS_WITHOUT_HOW_TO_SECTION.format( + concept_display_name=concept_display_name, + concept_page=concept_page, + ) + + return RELATED_LINKS_SECTION.format( + concept_display_name=concept_display_name, + concept_page=concept_page, + how_to_fragment=how_to_fragment, + ) def _process_path(doc_path: Path): From 61897aef900ae02973298aefcc250d90586c2bcf Mon Sep 17 00:00:00 2001 From: hippopond <160084905+hippopond@users.noreply.github.com> Date: Thu, 24 Oct 2024 08:04:18 -0700 Subject: [PATCH 3/7] docs: Fix for spelling mistake (#27599) Fixes #26009 Thank you for contributing to LangChain! - [x] **PR title**: "docs: Correcting spelling mistake" - [x] **PR message**: - **Description:** Corrected spelling from "trianed" to "trained" - **Issue:** the issue #26009 - **Dependencies:** NA - **Twitter handle:** NA - [ ] **Add tests and docs**: NA - [ ] **Lint and test**: Co-authored-by: Libby Lin --- docs/docs/how_to/output_parser_xml.ipynb | 2 +- docs/docs/how_to/output_parser_yaml.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/how_to/output_parser_xml.ipynb b/docs/docs/how_to/output_parser_xml.ipynb index 0ae3d1ed0d4..d01b5990fed 100644 --- a/docs/docs/how_to/output_parser_xml.ipynb +++ b/docs/docs/how_to/output_parser_xml.ipynb @@ -18,7 +18,7 @@ "\n", ":::\n", "\n", - "LLMs from different providers often have different strengths depending on the specific data they are trianed on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n", + "LLMs from different providers often have different strengths depending on the specific data they are trained on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n", "\n", "This guide shows you how to use the [`XMLOutputParser`](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) to prompt models for XML output, then and parse that output into a usable format.\n", "\n", diff --git a/docs/docs/how_to/output_parser_yaml.ipynb b/docs/docs/how_to/output_parser_yaml.ipynb index fedc1f88fd8..392873538d3 100644 --- a/docs/docs/how_to/output_parser_yaml.ipynb +++ b/docs/docs/how_to/output_parser_yaml.ipynb @@ -18,7 +18,7 @@ "\n", ":::\n", "\n", - "LLMs from different providers often have different strengths depending on the specific data they are trianed on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n", + "LLMs from different providers often have different strengths depending on the specific data they are trained on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n", "\n", "This output parser allows users to specify an arbitrary schema and query LLMs for outputs that conform to that schema, using YAML to format their response.\n", "\n", From 133c1b4f7601883294b5124359c3712fcb250db2 Mon Sep 17 00:00:00 2001 From: QiQi <57396324+laududu-cc@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:05:06 +0800 Subject: [PATCH 4/7] docs: Update passthrough.ipynb -- Grammar correction (#27601) Grammar correction needed in passthrough.ipynb The sentence is: "Now you've learned how to pass data through your chains to help to help format the data flowing through your chains." There's a redundant "to help", and it could be more succinctly written as: "Now you've learned how to pass data through your chains to help format the data flowing through your chains." --- docs/docs/how_to/passthrough.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/how_to/passthrough.ipynb b/docs/docs/how_to/passthrough.ipynb index 6a1aa359187..ab4c22d0647 100644 --- a/docs/docs/how_to/passthrough.ipynb +++ b/docs/docs/how_to/passthrough.ipynb @@ -153,7 +153,7 @@ "\n", "## Next steps\n", "\n", - "Now you've learned how to pass data through your chains to help to help format the data flowing through your chains.\n", + "Now you've learned how to pass data through your chains to help format the data flowing through your chains.\n", "\n", "To learn more, see the other how-to guides on runnables in this section." ] From da6b5267708ad8a77c832e8a56ff5ee01c8d9716 Mon Sep 17 00:00:00 2001 From: Daniel Vu Dao Date: Thu, 24 Oct 2024 08:05:43 -0700 Subject: [PATCH 5/7] docs: Update `Runnable` documentation (#27606) **Description** Adds better code formatting for one of the docs. --- docs/docs/how_to/output_parser_structured.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/how_to/output_parser_structured.ipynb b/docs/docs/how_to/output_parser_structured.ipynb index aaff800f954..f205cd0b550 100644 --- a/docs/docs/how_to/output_parser_structured.ipynb +++ b/docs/docs/how_to/output_parser_structured.ipynb @@ -214,7 +214,7 @@ "id": "3ca23082-c602-4ee8-af8c-a185b1f42bd1", "metadata": {}, "source": [ - "While the PydanticOutputParser cannot:" + "While the `PydanticOutputParser` cannot:" ] }, { From 2d58a8a08de7bd1ae0cd788d38d164d4b89ec356 Mon Sep 17 00:00:00 2001 From: Adarsh Sahu <118628700+yaarAdarsh@users.noreply.github.com> Date: Thu, 24 Oct 2024 20:43:28 +0530 Subject: [PATCH 6/7] docs: Update structured_outputs.mdx (#27613) `strightforward` => `straightforward` `adavanced` => `advanced` `There a few challenges` => `There are a few challenges` Documentation Correction: * [`docs/docs/concepts/structured_output.mdx`]: Corrected several typos in the sentence directing users to the API reference. --- docs/docs/concepts/structured_outputs.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/docs/concepts/structured_outputs.mdx b/docs/docs/concepts/structured_outputs.mdx index f58150d5c60..a334ecc1276 100644 --- a/docs/docs/concepts/structured_outputs.mdx +++ b/docs/docs/concepts/structured_outputs.mdx @@ -66,7 +66,7 @@ Several more powerful methods that utilizes native features in the model provide Many [model providers support](/docs/integrations/chat/) tool calling, a concept discussed in more detail in our [tool calling guide](/docs/concepts/tool_calling/). In short, tool calling involves binding a tool to a model and, when appropriate, the model can *decide* to call this tool and ensure its response conforms to the tool's schema. -With this in mind, the central concept is strightforward: *simply bind our schema to a model as a tool!* +With this in mind, the central concept is straightforward: *simply bind our schema to a model as a tool!* Here is an example using the `ResponseFormatter` schema defined above: ```python @@ -106,7 +106,7 @@ ai_msg.content ``` One important point to flag: the model *still* returns a string, which needs to be parsed into a JSON object. -This can, of course, simply use the `json` library or a JSON output parser if you need more adavanced functionality. +This can, of course, simply use the `json` library or a JSON output parser if you need more advanced functionality. See this [how-to guide on the JSON output parser](/docs/how_to/output_parser_json) for more details. ```python @@ -117,7 +117,7 @@ json_object = json.loads(ai_msg.content) ## Structured output method -There a few challenges when producing structured output with the above methods: +There are a few challenges when producing structured output with the above methods: (1) If using tool calling, tool call arguments needs to be parsed from a dictionary back to the original schema. @@ -145,4 +145,4 @@ ResponseFormatter(answer="The powerhouse of the cell is the mitochondrion. Mitoc For more details on usage, see our [how-to guide](/docs/how_to/structured_output/#the-with_structured_output-method). -::: \ No newline at end of file +::: From 20b56a02331de1f4d7d18e79edec5b5c08b15274 Mon Sep 17 00:00:00 2001 From: Tibor Reiss <75096465+tibor-reiss@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:36:35 +0200 Subject: [PATCH 7/7] core[patch]: fix repr and str for Serializable (#26786) Fixes #26499 --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> --- libs/core/langchain_core/load/serializable.py | 33 ++++++------ .../unit_tests/load/test_serializable.py | 52 +++++++++++++++---- 2 files changed, 61 insertions(+), 24 deletions(-) diff --git a/libs/core/langchain_core/load/serializable.py b/libs/core/langchain_core/load/serializable.py index b5e7d8b9150..7655438be97 100644 --- a/libs/core/langchain_core/load/serializable.py +++ b/libs/core/langchain_core/load/serializable.py @@ -10,6 +10,7 @@ from typing import ( ) from pydantic import BaseModel, ConfigDict +from pydantic.fields import FieldInfo from typing_extensions import NotRequired @@ -77,10 +78,23 @@ def try_neq_default(value: Any, key: str, model: BaseModel) -> bool: Raises: Exception: If the key is not in the model. """ + field = model.model_fields[key] + return _try_neq_default(value, field) + + +def _try_neq_default(value: Any, field: FieldInfo) -> bool: + # Handle edge case: inequality of two objects does not evaluate to a bool (e.g. two + # Pandas DataFrames). try: - return model.model_fields[key].get_default() != value - except Exception: - return True + return bool(field.get_default() != value) + except Exception as _: + try: + return all(field.get_default() != value) + except Exception as _: + try: + return value is not field.default + except Exception as _: + return False class Serializable(BaseModel, ABC): @@ -297,18 +311,7 @@ def _is_field_useful(inst: Serializable, key: str, value: Any) -> bool: if field.default_factory is list and isinstance(value, list): return False - # Handle edge case: inequality of two objects does not evaluate to a bool (e.g. two - # Pandas DataFrames). - try: - value_neq_default = bool(field.get_default() != value) - except Exception as _: - try: - value_neq_default = all(field.get_default() != value) - except Exception as _: - try: - value_neq_default = value is not field.default - except Exception as _: - value_neq_default = False + value_neq_default = _try_neq_default(value, field) # If value is falsy and does not match the default return value_is_truthy or value_neq_default diff --git a/libs/core/tests/unit_tests/load/test_serializable.py b/libs/core/tests/unit_tests/load/test_serializable.py index 65040a6841b..1c8b6772f09 100644 --- a/libs/core/tests/unit_tests/load/test_serializable.py +++ b/libs/core/tests/unit_tests/load/test_serializable.py @@ -4,6 +4,22 @@ from langchain_core.load import Serializable, dumpd, load from langchain_core.load.serializable import _is_field_useful +class NonBoolObj: + def __bool__(self) -> bool: + msg = "Truthiness can't be determined" + raise ValueError(msg) + + def __eq__(self, other: object) -> bool: + msg = "Equality can't be determined" + raise ValueError(msg) + + def __str__(self) -> str: + return self.__class__.__name__ + + def __repr__(self) -> str: + return self.__class__.__name__ + + def test_simple_serialization() -> None: class Foo(Serializable): bar: int @@ -82,15 +98,6 @@ def test__is_field_useful() -> None: def __eq__(self, other: object) -> bool: return self # type: ignore[return-value] - class NonBoolObj: - def __bool__(self) -> bool: - msg = "Truthiness can't be determined" - raise ValueError(msg) - - def __eq__(self, other: object) -> bool: - msg = "Equality can't be determined" - raise ValueError(msg) - default_x = ArrayObj() default_y = NonBoolObj() @@ -169,3 +176,30 @@ def test_simple_deserialization_with_additional_imports() -> None: }, ) assert isinstance(new_foo, Foo2) + + +class Foo3(Serializable): + model_config = ConfigDict(arbitrary_types_allowed=True) + + content: str + non_bool: NonBoolObj + + @classmethod + def is_lc_serializable(cls) -> bool: + return True + + +def test_repr() -> None: + foo = Foo3( + content="repr", + non_bool=NonBoolObj(), + ) + assert repr(foo) == "Foo3(content='repr', non_bool=NonBoolObj)" + + +def test_str() -> None: + foo = Foo3( + content="str", + non_bool=NonBoolObj(), + ) + assert str(foo) == "content='str' non_bool=NonBoolObj"