Merge branch 'master' into jacob/ollama

This commit is contained in:
Erick Friis 2025-02-10 09:13:08 -08:00
commit 88c437ec2c
5 changed files with 25 additions and 10 deletions

View File

@ -99,8 +99,6 @@
"\n", "\n",
"prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n", "prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n",
"\n", "\n",
"chain1 = prompt | model\n",
"\n",
"chain = (\n", "chain = (\n",
" {\n", " {\n",
" \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n", " \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n",

View File

@ -156,6 +156,15 @@
" db_name=\"vearch_cluster_langchian\",\n", " db_name=\"vearch_cluster_langchian\",\n",
" table_name=\"tobenumone\",\n", " table_name=\"tobenumone\",\n",
" flag=1,\n", " flag=1,\n",
")\n",
"\n",
"# The vector data is usually already initialized, so we dont need the document parameter and can directly create the object.\n",
"vearch_cluster_b = Vearch(\n",
" embeddings,\n",
" path_or_url=\"http://test-vearch-langchain-router.vectorbase.svc.ht1.n.jd.local\",\n",
" db_name=\"vearch_cluster_langchian\",\n",
" table_name=\"tobenumone\",\n",
" flag=1,\n",
")" ")"
] ]
}, },
@ -244,6 +253,7 @@
], ],
"source": [ "source": [
"query = \"你知道凌波微步吗,你知道都有谁会凌波微步?\"\n", "query = \"你知道凌波微步吗,你知道都有谁会凌波微步?\"\n",
"# The second parameter is the top-n to retrieve, and its default value is 4.\n",
"vearch_standalone_res = vearch_standalone.similarity_search(query, 3)\n", "vearch_standalone_res = vearch_standalone.similarity_search(query, 3)\n",
"for idx, tmp in enumerate(vearch_standalone_res):\n", "for idx, tmp in enumerate(vearch_standalone_res):\n",
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n", " print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
@ -261,6 +271,11 @@
"for idx, tmp in enumerate(cluster_res):\n", "for idx, tmp in enumerate(cluster_res):\n",
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n", " print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
"\n", "\n",
"# In practical applications, we usually limit the boundary value of similarity. The following method can set this value.\n",
"cluster_res_with_bound = vearch_cluster.similarity_search_with_score(\n",
" query=query_c, k=3, min_score=0.5\n",
")\n",
"\n",
"# combine your local knowleadge and query\n", "# combine your local knowleadge and query\n",
"context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n", "context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n",
"new_query_c = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n", "new_query_c = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n",

View File

@ -154,7 +154,7 @@
"id": "ff3cf30d", "id": "ff3cf30d",
"metadata": {}, "metadata": {},
"source": [ "source": [
"If we want dictionary output, we can just call `.dict()`" "If we want dictionary output, we can just call `.model_dump()`"
] ]
}, },
{ {
@ -179,7 +179,7 @@
"prompt = tagging_prompt.invoke({\"input\": inp})\n", "prompt = tagging_prompt.invoke({\"input\": inp})\n",
"response = llm.invoke(prompt)\n", "response = llm.invoke(prompt)\n",
"\n", "\n",
"response.dict()" "response.model_dump()"
] ]
}, },
{ {

View File

@ -392,11 +392,11 @@ class GoogleApiYoutubeLoader(BaseLoader):
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def validate_channel_or_videoIds_is_set(cls, values: Dict[str, Any]) -> Any: def validate_channel_or_videoIds_is_set(cls, values: Any) -> Any:
"""Validate that either folder_id or document_ids is set, but not both.""" """Validate that either folder_id or document_ids is set, but not both."""
if not values.get("channel_name") and not values.get("video_ids"): if not values.kwargs.get("channel_name") and not values.kwargs.get("video_ids"):
raise ValueError("Must specify either channel_name or video_ids") raise ValueError("Must specify either channel_name or video_ids")
return values return values.kwargs
def _get_transcripe_for_video_id(self, video_id: str) -> str: def _get_transcripe_for_video_id(self, video_id: str) -> str:
from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi

View File

@ -31,6 +31,7 @@ def create_index(
ids: Optional[List[str]] = None, ids: Optional[List[str]] = None,
metadatas: Optional[List[dict]] = None, metadatas: Optional[List[dict]] = None,
namespace: Optional[str] = None, namespace: Optional[str] = None,
text_key: str = "context",
) -> None: ) -> None:
"""Create an index from a list of contexts. """Create an index from a list of contexts.
@ -69,7 +70,7 @@ def create_index(
) )
# add context passages as metadata # add context passages as metadata
meta = [ meta = [
{"context": context, **metadata} {text_key: context, **metadata}
for context, metadata in zip(context_batch, metadata_batch) for context, metadata in zip(context_batch, metadata_batch)
] ]
@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
"""Alpha value for hybrid search.""" """Alpha value for hybrid search."""
namespace: Optional[str] = None namespace: Optional[str] = None
"""Namespace value for index partition.""" """Namespace value for index partition."""
text_key: str = "context"
model_config = ConfigDict( model_config = ConfigDict(
arbitrary_types_allowed=True, arbitrary_types_allowed=True,
extra="forbid", extra="forbid",
@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
ids=ids, ids=ids,
metadatas=metadatas, metadatas=metadatas,
namespace=namespace, namespace=namespace,
text_key=self.text_key,
) )
@pre_init @pre_init
@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
) )
final_result = [] final_result = []
for res in result["matches"]: for res in result["matches"]:
context = res["metadata"].pop("context") context = res["metadata"].pop(self.text_key)
metadata = res["metadata"] metadata = res["metadata"]
if "score" not in metadata and "score" in res: if "score" not in metadata and "score" in res:
metadata["score"] = res["score"] metadata["score"] = res["score"]