Merge branch 'master' into jacob/ollama

2025-08-17 16:39:52 +00:00 · 2025-02-10 09:13:08 -08:00 · 2025-02-10 09:13:08 -08:00 · 88c437ec2c
commit 88c437ec2c
parent 4b6e10a413 624216aa64
5 changed files with 25 additions and 10 deletions
--- a/docs/docs/how_to/functions.ipynb
+++ b/docs/docs/how_to/functions.ipynb
@ -99,8 +99,6 @@
    "\n",
    "prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n",
    "\n",
-    "chain1 = prompt | model\n",
-    "\n",
    "chain = (\n",
    "    {\n",
    "        \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n",
--- a/docs/docs/integrations/vectorstores/vearch.ipynb
+++ b/docs/docs/integrations/vectorstores/vearch.ipynb
@ -156,6 +156,15 @@
    "    db_name=\"vearch_cluster_langchian\",\n",
    "    table_name=\"tobenumone\",\n",
    "    flag=1,\n",
+    ")\n",
+    "\n",
+    "# The vector data is usually already initialized, so we don’t need the document parameter and can directly create the object.\n",
+    "vearch_cluster_b = Vearch(\n",
+    "    embeddings,\n",
+    "    path_or_url=\"http://test-vearch-langchain-router.vectorbase.svc.ht1.n.jd.local\",\n",
+    "    db_name=\"vearch_cluster_langchian\",\n",
+    "    table_name=\"tobenumone\",\n",
+    "    flag=1,\n",
    ")"
   ]
  },
@ -244,6 +253,7 @@
   ],
   "source": [
    "query = \"你知道凌波微步吗，你知道都有谁会凌波微步?\"\n",
+    "# The second parameter is the top-n to retrieve, and its default value is 4.\n",
    "vearch_standalone_res = vearch_standalone.similarity_search(query, 3)\n",
    "for idx, tmp in enumerate(vearch_standalone_res):\n",
    "    print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
@ -261,6 +271,11 @@
    "for idx, tmp in enumerate(cluster_res):\n",
    "    print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
    "\n",
+    "# In practical applications, we usually limit the boundary value of similarity. The following method can set this value.\n",
+    "cluster_res_with_bound = vearch_cluster.similarity_search_with_score(\n",
+    "    query=query_c, k=3, min_score=0.5\n",
+    ")\n",
+    "\n",
    "# combine your local knowleadge and query\n",
    "context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n",
    "new_query_c = f\"基于以下信息，尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n",
--- a/docs/docs/tutorials/classification.ipynb
+++ b/docs/docs/tutorials/classification.ipynb
@ -154,7 +154,7 @@
   "id": "ff3cf30d",
   "metadata": {},
   "source": [
-    "If we want dictionary output, we can just call `.dict()`"
+    "If we want dictionary output, we can just call `.model_dump()`"
   ]
  },
  {
@ -179,7 +179,7 @@
    "prompt = tagging_prompt.invoke({\"input\": inp})\n",
    "response = llm.invoke(prompt)\n",
    "\n",
-    "response.dict()"
+    "response.model_dump()"
   ]
  },
  {
--- a/libs/community/langchain_community/document_loaders/youtube.py
+++ b/libs/community/langchain_community/document_loaders/youtube.py
@ -392,11 +392,11 @@ class GoogleApiYoutubeLoader(BaseLoader):

    @model_validator(mode="before")
    @classmethod
-    def validate_channel_or_videoIds_is_set(cls, values: Dict[str, Any]) -> Any:
+    def validate_channel_or_videoIds_is_set(cls, values: Any) -> Any:
        """Validate that either folder_id or document_ids is set, but not both."""
-        if not values.get("channel_name") and not values.get("video_ids"):
+        if not values.kwargs.get("channel_name") and not values.kwargs.get("video_ids"):
            raise ValueError("Must specify either channel_name or video_ids")
-        return values
+        return values.kwargs

    def _get_transcripe_for_video_id(self, video_id: str) -> str:
        from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi
--- a/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py
+++ b/libs/community/langchain_community/retrievers/pinecone_hybrid_search.py
@ -31,6 +31,7 @@ def create_index(
    ids: Optional[List[str]] = None,
    metadatas: Optional[List[dict]] = None,
    namespace: Optional[str] = None,
+    text_key: str = "context",
 ) -> None:
    """Create an index from a list of contexts.

@ -69,7 +70,7 @@ def create_index(
        )
        # add context passages as metadata
        meta = [
-            {"context": context, **metadata}
+            {text_key: context, **metadata}
            for context, metadata in zip(context_batch, metadata_batch)
        ]

@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
    """Alpha value for hybrid search."""
    namespace: Optional[str] = None
    """Namespace value for index partition."""
-
+    text_key: str = "context"
    model_config = ConfigDict(
        arbitrary_types_allowed=True,
        extra="forbid",
@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
            ids=ids,
            metadatas=metadatas,
            namespace=namespace,
+            text_key=self.text_key,
        )

    @pre_init
@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
        )
        final_result = []
        for res in result["matches"]:
-            context = res["metadata"].pop("context")
+            context = res["metadata"].pop(self.text_key)
            metadata = res["metadata"]
            if "score" not in metadata and "score" in res:
                metadata["score"] = res["score"]