mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-17 10:13:29 +00:00
Merge branch 'master' into pprados/06-pdfplumber
This commit is contained in:
commit
7733591803
@ -99,8 +99,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n",
|
"prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"chain1 = prompt | model\n",
|
|
||||||
"\n",
|
|
||||||
"chain = (\n",
|
"chain = (\n",
|
||||||
" {\n",
|
" {\n",
|
||||||
" \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n",
|
" \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n",
|
||||||
|
@ -27,18 +27,12 @@
|
|||||||
"If you'd like to learn more about Nimble, visit us at [nimbleway.com](https://www.nimbleway.com/).\n",
|
"If you'd like to learn more about Nimble, visit us at [nimbleway.com](https://www.nimbleway.com/).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Currently we expose the following components\n",
|
"## Retrievers:"
|
||||||
"\n",
|
|
||||||
"* **Retriever** - Allow us to query the internet and get parsed textual results utilizing several search engines.\n",
|
|
||||||
"\n",
|
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": "### NimbleSearchRetriever",
|
||||||
"## Usage"
|
|
||||||
],
|
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "AuMFgVFrKbNH"
|
"id": "AuMFgVFrKbNH"
|
||||||
},
|
},
|
||||||
@ -47,7 +41,9 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"In order to use our provider you have to provide an API key like so"
|
"Enables developers to build RAG applications and AI Agents that can search, access, and retrieve online information from anywhere on the web.\n",
|
||||||
|
"\n",
|
||||||
|
"We need to install the `langchain-nimble` python package."
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "sFlPjZX9KdK6"
|
"id": "sFlPjZX9KdK6"
|
||||||
@ -55,25 +51,32 @@
|
|||||||
"id": "sFlPjZX9KdK6"
|
"id": "sFlPjZX9KdK6"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"metadata": {},
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"outputs": [],
|
||||||
"import getpass\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"os.environ[\"NIMBLE_API_KEY\"] = getpass.getpass()"
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"id": "eAqSHZ-Z8R3F"
|
|
||||||
},
|
|
||||||
"id": "eAqSHZ-Z8R3F",
|
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"source": "%pip install -U langchain-nimble",
|
||||||
|
"id": "65f237c852aa3885"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": "See a [usage example](/docs/integrations/retrievers/nimble/).",
|
||||||
|
"id": "77bd7b9a6a8e381b"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"```python\n",
|
||||||
|
"from langchain_nimble import NimbeSearchRetriever\n",
|
||||||
|
"```"
|
||||||
|
],
|
||||||
|
"id": "511f9d569c21a5d2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": "Note that authentication is required, please refer to the [Setup section in the documentation](/docs/integrations/retrievers/nimble/#setup).",
|
||||||
"For more information about the Authentication process, see [Nimble APIs Authentication Documentation](https://docs.nimbleway.com/nimble-sdk/web-api/nimble-web-api-quick-start-guide/nimble-apis-authentication)."
|
|
||||||
],
|
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "WfwnI_RS8PO5"
|
"id": "WfwnI_RS8PO5"
|
||||||
},
|
},
|
||||||
|
File diff suppressed because one or more lines are too long
@ -156,6 +156,15 @@
|
|||||||
" db_name=\"vearch_cluster_langchian\",\n",
|
" db_name=\"vearch_cluster_langchian\",\n",
|
||||||
" table_name=\"tobenumone\",\n",
|
" table_name=\"tobenumone\",\n",
|
||||||
" flag=1,\n",
|
" flag=1,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# The vector data is usually already initialized, so we don’t need the document parameter and can directly create the object.\n",
|
||||||
|
"vearch_cluster_b = Vearch(\n",
|
||||||
|
" embeddings,\n",
|
||||||
|
" path_or_url=\"http://test-vearch-langchain-router.vectorbase.svc.ht1.n.jd.local\",\n",
|
||||||
|
" db_name=\"vearch_cluster_langchian\",\n",
|
||||||
|
" table_name=\"tobenumone\",\n",
|
||||||
|
" flag=1,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -244,6 +253,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"你知道凌波微步吗,你知道都有谁会凌波微步?\"\n",
|
"query = \"你知道凌波微步吗,你知道都有谁会凌波微步?\"\n",
|
||||||
|
"# The second parameter is the top-n to retrieve, and its default value is 4.\n",
|
||||||
"vearch_standalone_res = vearch_standalone.similarity_search(query, 3)\n",
|
"vearch_standalone_res = vearch_standalone.similarity_search(query, 3)\n",
|
||||||
"for idx, tmp in enumerate(vearch_standalone_res):\n",
|
"for idx, tmp in enumerate(vearch_standalone_res):\n",
|
||||||
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
||||||
@ -261,6 +271,11 @@
|
|||||||
"for idx, tmp in enumerate(cluster_res):\n",
|
"for idx, tmp in enumerate(cluster_res):\n",
|
||||||
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# In practical applications, we usually limit the boundary value of similarity. The following method can set this value.\n",
|
||||||
|
"cluster_res_with_bound = vearch_cluster.similarity_search_with_score(\n",
|
||||||
|
" query=query_c, k=3, min_score=0.5\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
"# combine your local knowleadge and query\n",
|
"# combine your local knowleadge and query\n",
|
||||||
"context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n",
|
"context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n",
|
||||||
"new_query_c = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n",
|
"new_query_c = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n",
|
||||||
|
@ -154,7 +154,7 @@
|
|||||||
"id": "ff3cf30d",
|
"id": "ff3cf30d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"If we want dictionary output, we can just call `.dict()`"
|
"If we want dictionary output, we can just call `.model_dump()`"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -179,7 +179,7 @@
|
|||||||
"prompt = tagging_prompt.invoke({\"input\": inp})\n",
|
"prompt = tagging_prompt.invoke({\"input\": inp})\n",
|
||||||
"response = llm.invoke(prompt)\n",
|
"response = llm.invoke(prompt)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"response.dict()"
|
"response.model_dump()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -64,7 +64,7 @@ pdfplumber>=0.11
|
|||||||
pgvector>=0.1.6,<0.2
|
pgvector>=0.1.6,<0.2
|
||||||
playwright>=1.48.0,<2
|
playwright>=1.48.0,<2
|
||||||
praw>=7.7.1,<8
|
praw>=7.7.1,<8
|
||||||
premai>=0.3.25,<0.4
|
premai>=0.3.25,<0.4,!=0.3.100
|
||||||
psychicapi>=0.8.0,<0.9
|
psychicapi>=0.8.0,<0.9
|
||||||
pydantic>=2.7.4,<3
|
pydantic>=2.7.4,<3
|
||||||
pytesseract>=0.3.13
|
pytesseract>=0.3.13
|
||||||
|
@ -392,11 +392,11 @@ class GoogleApiYoutubeLoader(BaseLoader):
|
|||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_channel_or_videoIds_is_set(cls, values: Dict[str, Any]) -> Any:
|
def validate_channel_or_videoIds_is_set(cls, values: Any) -> Any:
|
||||||
"""Validate that either folder_id or document_ids is set, but not both."""
|
"""Validate that either folder_id or document_ids is set, but not both."""
|
||||||
if not values.get("channel_name") and not values.get("video_ids"):
|
if not values.kwargs.get("channel_name") and not values.kwargs.get("video_ids"):
|
||||||
raise ValueError("Must specify either channel_name or video_ids")
|
raise ValueError("Must specify either channel_name or video_ids")
|
||||||
return values
|
return values.kwargs
|
||||||
|
|
||||||
def _get_transcripe_for_video_id(self, video_id: str) -> str:
|
def _get_transcripe_for_video_id(self, video_id: str) -> str:
|
||||||
from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi
|
from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi
|
||||||
|
@ -31,6 +31,7 @@ def create_index(
|
|||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
namespace: Optional[str] = None,
|
namespace: Optional[str] = None,
|
||||||
|
text_key: str = "context",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create an index from a list of contexts.
|
"""Create an index from a list of contexts.
|
||||||
|
|
||||||
@ -69,7 +70,7 @@ def create_index(
|
|||||||
)
|
)
|
||||||
# add context passages as metadata
|
# add context passages as metadata
|
||||||
meta = [
|
meta = [
|
||||||
{"context": context, **metadata}
|
{text_key: context, **metadata}
|
||||||
for context, metadata in zip(context_batch, metadata_batch)
|
for context, metadata in zip(context_batch, metadata_batch)
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
|
|||||||
"""Alpha value for hybrid search."""
|
"""Alpha value for hybrid search."""
|
||||||
namespace: Optional[str] = None
|
namespace: Optional[str] = None
|
||||||
"""Namespace value for index partition."""
|
"""Namespace value for index partition."""
|
||||||
|
text_key: str = "context"
|
||||||
model_config = ConfigDict(
|
model_config = ConfigDict(
|
||||||
arbitrary_types_allowed=True,
|
arbitrary_types_allowed=True,
|
||||||
extra="forbid",
|
extra="forbid",
|
||||||
@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
|
|||||||
ids=ids,
|
ids=ids,
|
||||||
metadatas=metadatas,
|
metadatas=metadatas,
|
||||||
namespace=namespace,
|
namespace=namespace,
|
||||||
|
text_key=self.text_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
@pre_init
|
@pre_init
|
||||||
@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
|
|||||||
)
|
)
|
||||||
final_result = []
|
final_result = []
|
||||||
for res in result["matches"]:
|
for res in result["matches"]:
|
||||||
context = res["metadata"].pop("context")
|
context = res["metadata"].pop(self.text_key)
|
||||||
metadata = res["metadata"]
|
metadata = res["metadata"]
|
||||||
if "score" not in metadata and "score" in res:
|
if "score" not in metadata and "score" in res:
|
||||||
metadata["score"] = res["score"]
|
metadata["score"] = res["score"]
|
||||||
|
@ -70,7 +70,7 @@ DEFAULT_PROPERTIES = [
|
|||||||
DEFAULT_LANG_CODE = "en"
|
DEFAULT_LANG_CODE = "en"
|
||||||
WIKIDATA_USER_AGENT = "langchain-wikidata"
|
WIKIDATA_USER_AGENT = "langchain-wikidata"
|
||||||
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"
|
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"
|
||||||
WIKIDATA_REST_API_URL = "https://www.wikidata.org/w/rest.php/wikibase/v0/"
|
WIKIDATA_REST_API_URL = "https://www.wikidata.org/w/rest.php/wikibase/v1/"
|
||||||
|
|
||||||
|
|
||||||
class WikidataAPIWrapper(BaseModel):
|
class WikidataAPIWrapper(BaseModel):
|
||||||
|
Loading…
Reference in New Issue
Block a user