From c36e6d4371750037b156f76549519db0692b7b6f Mon Sep 17 00:00:00 2001
From: Edmond Wang <edmondsky@hotmail.com>
Date: Mon, 10 Feb 2025 21:35:38 +0800
Subject: [PATCH 1/5] =?UTF-8?q?docs:=20Add=20Comments=20and=20Supplementar?=
 =?UTF-8?q?y=20Example=20Code=20to=20Vearch=20Vector=20Dat=E2=80=A6=20(#29?=
 =?UTF-8?q?706)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- **Description:** Added some comments to the example code in the Vearch
vector database documentation and included commonly used sample code.
- **Issue:** None
- **Dependencies:** None

---------

Co-authored-by: wangchuxiong <wangchuxiong@jd.com>
---
 docs/docs/integrations/vectorstores/vearch.ipynb | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/docs/integrations/vectorstores/vearch.ipynb b/docs/docs/integrations/vectorstores/vearch.ipynb
index da2efb893a7..d5e40fdc4c1 100644
--- a/docs/docs/integrations/vectorstores/vearch.ipynb
+++ b/docs/docs/integrations/vectorstores/vearch.ipynb
@@ -156,6 +156,15 @@
     "    db_name=\"vearch_cluster_langchian\",\n",
     "    table_name=\"tobenumone\",\n",
     "    flag=1,\n",
+    ")\n",
+    "\n",
+    "# The vector data is usually already initialized, so we don’t need the document parameter and can directly create the object.\n",
+    "vearch_cluster_b = Vearch(\n",
+    "    embeddings,\n",
+    "    path_or_url=\"http://test-vearch-langchain-router.vectorbase.svc.ht1.n.jd.local\",\n",
+    "    db_name=\"vearch_cluster_langchian\",\n",
+    "    table_name=\"tobenumone\",\n",
+    "    flag=1,\n",
     ")"
    ]
   },
@@ -244,6 +253,7 @@
    ],
    "source": [
     "query = \"你知道凌波微步吗，你知道都有谁会凌波微步?\"\n",
+    "# The second parameter is the top-n to retrieve, and its default value is 4.\n",
     "vearch_standalone_res = vearch_standalone.similarity_search(query, 3)\n",
     "for idx, tmp in enumerate(vearch_standalone_res):\n",
     "    print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
@@ -261,6 +271,11 @@
     "for idx, tmp in enumerate(cluster_res):\n",
     "    print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
     "\n",
+    "# In practical applications, we usually limit the boundary value of similarity. The following method can set this value.\n",
+    "cluster_res_with_bound = vearch_cluster.similarity_search_with_score(\n",
+    "    query=query_c, k=3, min_score=0.5\n",
+    ")\n",
+    "\n",
     "# combine your local knowleadge and query\n",
     "context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n",
     "new_query_c = f\"基于以下信息，尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n",

From 6655246504085d7d9d68505fba30d1f14262eaad Mon Sep 17 00:00:00 2001
From: Tiest van Gool <tiestvangool@icloud.com>
Date: Mon, 10 Feb 2025 06:38:15 -0700
Subject: [PATCH 2/5] Classification Tutorial: Replaced .dict() with
 .model_dump() method (#29701)

The .dict() method is deprecated inf Pydantic V2.0 and use `model_dump`
method instead.

Thank you for contributing to LangChain!

- [ ] **PR title**: "package: description"
- Where "package" is whichever of langchain, community, core, etc. is
being modified. Use "docs: ..." for purely docs changes, "infra: ..."
for CI changes.
  - Example: "community: add foobar LLM"


- [ ] **PR message**: ***Delete this entire checklist*** and replace
with
    - **Description:** a description of the change
    - **Issue:** the issue # it fixes, if applicable
    - **Dependencies:** any dependencies required for this change
- **Twitter handle:** if your PR gets announced, and you'd like a
mention, we'll gladly shout you out!


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.
---
 docs/docs/tutorials/classification.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/docs/tutorials/classification.ipynb b/docs/docs/tutorials/classification.ipynb
index b61ab56e3c3..4efafa165c7 100644
--- a/docs/docs/tutorials/classification.ipynb
+++ b/docs/docs/tutorials/classification.ipynb
@@ -154,7 +154,7 @@
    "id": "ff3cf30d",
    "metadata": {},
    "source": [
-    "If we want dictionary output, we can just call `.dict()`"
+    "If we want dictionary output, we can just call `.model_dump()`"
    ]
   },
   {
@@ -179,7 +179,7 @@
     "prompt = tagging_prompt.invoke({\"input\": inp})\n",
     "response = llm.invoke(prompt)\n",
     "\n",
-    "response.dict()"
+    "response.model_dump()"
    ]
   },
   {

From 894b0cac3c84ca9a0b6fa6e8d1f5e738fa32cdcc Mon Sep 17 00:00:00 2001
From: Jun He <junhe@users.noreply.github.com>
Date: Mon, 10 Feb 2025 07:53:21 -0600
Subject: [PATCH 3/5] docs: Remove redundant line (#29698)

If I understand it correctly, chain1 is never used.
---
 docs/docs/how_to/functions.ipynb | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/docs/how_to/functions.ipynb b/docs/docs/how_to/functions.ipynb
index 9a37eda942f..6ffec14c30a 100644
--- a/docs/docs/how_to/functions.ipynb
+++ b/docs/docs/how_to/functions.ipynb
@@ -99,8 +99,6 @@
     "\n",
     "prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n",
     "\n",
-    "chain1 = prompt | model\n",
-    "\n",
     "chain = (\n",
     "    {\n",
     "        \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n",

From 60740c44c53441a1e8d16fbd17cb162cc03b308e Mon Sep 17 00:00:00 2001
From: Changyong Um <e7217@naver.com>
Date: Mon, 10 Feb 2025 22:56:37 +0900
Subject: [PATCH 4/5] community: Add configurable text key for indexing and the
 retriever in Pinecone Hybrid Search (#29697)

**issue**

In Langchain, the original content is generally stored under the `text`
key. However, the `PineconeHybridSearchRetriever` searches the `context`
field in the metadata and cannot change this key. To address this, I
have modified the code to allow changing the key to something other than
context.

In my opinion, following Langchain's conventions, the `text` key seems
more appropriate than `context`. However, since I wasn't sure about the
author's intent, I have left the default value as `context`.
---
 .../retrievers/pinecone_hybrid_search.py                  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py b/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py
index a6e0f68002d..cd3e3e96d08 100644
--- a/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py
+++ b/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py
@@ -31,6 +31,7 @@ def create_index(
     ids: Optional[List[str]] = None,
     metadatas: Optional[List[dict]] = None,
     namespace: Optional[str] = None,
+    text_key: str = "context",
 ) -> None:
     """Create an index from a list of contexts.
 
@@ -69,7 +70,7 @@ def create_index(
         )
         # add context passages as metadata
         meta = [
-            {"context": context, **metadata}
+            {text_key: context, **metadata}
             for context, metadata in zip(context_batch, metadata_batch)
         ]
 
@@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
     """Alpha value for hybrid search."""
     namespace: Optional[str] = None
     """Namespace value for index partition."""
-
+    text_key: str = "context"
     model_config = ConfigDict(
         arbitrary_types_allowed=True,
         extra="forbid",
@@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
             ids=ids,
             metadatas=metadatas,
             namespace=namespace,
+            text_key=self.text_key,
         )
 
     @pre_init
@@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
         )
         final_result = []
         for res in result["matches"]:
-            context = res["metadata"].pop("context")
+            context = res["metadata"].pop(self.text_key)
             metadata = res["metadata"]
             if "score" not in metadata and "score" in res:
                 metadata["score"] = res["score"]

From 624216aa64cc48e5a0f1db5f65d5c2dc8581f0f1 Mon Sep 17 00:00:00 2001
From: Bhav Sardana <40908961+sardanabhav@users.noreply.github.com>
Date: Mon, 10 Feb 2025 19:27:58 +0530
Subject: [PATCH 5/5] community:Fix for Pydantic model validator of
 GoogleApiYoutubeLoader (#29694)

- **Description:** Community: bugfix for pedantic model validator for
GoogleApiYoutubeLoader
- **Issue:** #29165, #27432
Fix is similar to #29346
---
 .../langchain_community/document_loaders/youtube.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/youtube.py b/libs/community/langchain_community/document_loaders/youtube.py
index 64871eb3caf..1b99a8d2da2 100644
--- a/libs/community/langchain_community/document_loaders/youtube.py
+++ b/libs/community/langchain_community/document_loaders/youtube.py
@@ -392,11 +392,11 @@ class GoogleApiYoutubeLoader(BaseLoader):
 
     @model_validator(mode="before")
     @classmethod
-    def validate_channel_or_videoIds_is_set(cls, values: Dict[str, Any]) -> Any:
+    def validate_channel_or_videoIds_is_set(cls, values: Any) -> Any:
         """Validate that either folder_id or document_ids is set, but not both."""
-        if not values.get("channel_name") and not values.get("video_ids"):
+        if not values.kwargs.get("channel_name") and not values.kwargs.get("video_ids"):
             raise ValueError("Must specify either channel_name or video_ids")
-        return values
+        return values.kwargs
 
     def _get_transcripe_for_video_id(self, video_id: str) -> str:
         from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi