community: Add configurable text key for indexing and the retriever in Pinecone Hybrid Search (#29697)

**issue**

In Langchain, the original content is generally stored under the `text`
key. However, the `PineconeHybridSearchRetriever` searches the `context`
field in the metadata and cannot change this key. To address this, I
have modified the code to allow changing the key to something other than
context.

In my opinion, following Langchain's conventions, the `text` key seems
more appropriate than `context`. However, since I wasn't sure about the
author's intent, I have left the default value as `context`.
This commit is contained in:
Changyong Um 2025-02-10 22:56:37 +09:00 committed by GitHub
parent 894b0cac3c
commit 60740c44c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -31,6 +31,7 @@ def create_index(
ids: Optional[List[str]] = None, ids: Optional[List[str]] = None,
metadatas: Optional[List[dict]] = None, metadatas: Optional[List[dict]] = None,
namespace: Optional[str] = None, namespace: Optional[str] = None,
text_key: str = "context",
) -> None: ) -> None:
"""Create an index from a list of contexts. """Create an index from a list of contexts.
@ -69,7 +70,7 @@ def create_index(
) )
# add context passages as metadata # add context passages as metadata
meta = [ meta = [
{"context": context, **metadata} {text_key: context, **metadata}
for context, metadata in zip(context_batch, metadata_batch) for context, metadata in zip(context_batch, metadata_batch)
] ]
@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
"""Alpha value for hybrid search.""" """Alpha value for hybrid search."""
namespace: Optional[str] = None namespace: Optional[str] = None
"""Namespace value for index partition.""" """Namespace value for index partition."""
text_key: str = "context"
model_config = ConfigDict( model_config = ConfigDict(
arbitrary_types_allowed=True, arbitrary_types_allowed=True,
extra="forbid", extra="forbid",
@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
ids=ids, ids=ids,
metadatas=metadatas, metadatas=metadatas,
namespace=namespace, namespace=namespace,
text_key=self.text_key,
) )
@pre_init @pre_init
@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
) )
final_result = [] final_result = []
for res in result["matches"]: for res in result["matches"]:
context = res["metadata"].pop("context") context = res["metadata"].pop(self.text_key)
metadata = res["metadata"] metadata = res["metadata"]
if "score" not in metadata and "score" in res: if "score" not in metadata and "score" in res:
metadata["score"] = res["score"] metadata["score"] = res["score"]